version: 04 December, 2025

About this document


All analyses preformed with R version 4.5.2.

Basic setup of R environment


Loading required packages

For the following analyses we will require the use of a number of different R packages. We can use the following code to quickly load in the packages and install any packages not previously installed in the R console.

if (!require("pacman")) install.packages("pacman")

pacman::p_load_gh("pmartinezarbizu/pairwiseAdonis/pairwiseAdonis", "ropensci/rnaturalearthhires", "KarstensLab/microshades")

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

# BiocManager::install("ggtree")
# BiocManager::install("ggtreeExtra")
# devtools::install_github("GuangchuangYu/ggtree")

pacman::p_load("cowplot", "car", "ggrepel", "ggspatial", "ggstance", "ggtreeExtra", "ggtree", "paletteer", "patchwork", "rnaturalearth", "sf", "Hmisc", "pairwiseAdonis", "RColorBrewer", "Redmonder", "flextable", "lubridate", "officer", "adegenet", "dendextend", "gdata", "ggdendro", "hierfstat", "kableExtra", "poppr", "reshape2", "StAMPP", "vcfR", "vegan", "boa", "magick", "sdmpredictors", "ggcorrplot", "tidyverse", "TeachingDemos", "LaplacesDemon", "adespatial", "ggnewscale", "ggbeeswarm", "multcomp", "rstatix", "R.utils", "graph4lg", "ordinal", "lme4", "ggeffects", "performance", "MuMIn", "r2glmm")

options("scipen" = 10)

# load("flXesto.RData")


Themes and colors

Making color palettes to use throughout all plots

flPal = paletteer_c(`"viridis::turbo"`, 9, direction = -1)[c(2:9)]

pink = "#FF6A8BFF"

purple = paletteer_d("vapoRwave::vapoRwave")[10]

xestoKColPal = c("#6A0E4F", "#FF2879", "#9671A6", "#E587A6", "#F9C3E1", "#B55A94")

Plot themes

dendTheme = theme(axis.title.x = element_blank(),
  axis.text.x = element_blank(),
  axis.line.x = element_blank(),
  axis.ticks.x = element_blank(),
  axis.title.y = element_text(size = 10, color = "black", angle = 90),
  axis.text.y = element_text(size = 8, color = "black"),
  axis.line.y = element_line(),
  axis.ticks.y = element_line(),
  panel.grid = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  legend.key.size = unit(0.75, "line"),
  legend.key = element_blank(),
  legend.title = element_text(size = 10),
  legend.text = element_text(size = 8),
  legend.direction = "horizontal",
  legend.box = "vertical",
  legend.position = c(0.13, 0.1))

pcaTheme = theme(axis.title.x = element_text(color = "black", size = 10),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.line.x = element_blank(),
        axis.title.y = element_text(color = "black", size = 10),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.line.y = element_blank(),
        legend.position = "none",
        legend.title = element_text(size = 10),
        legend.text = element_text(size = 10),
        legend.key.size = unit(5, "pt"),
        panel.border = element_rect(color = "black", size = 1),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
  
admixTheme = theme(
  panel.grid = element_blank(),
  # panel.background = element_rect(fill = "gray85"),
  panel.background = element_blank(),
  plot.background = element_blank(),
  panel.border = element_rect(fill = NA, color = "black", size = 0.75, linetype = "solid"),
  panel.spacing.x = grid:::unit(0.05, "lines"),
  panel.spacing.y = grid:::unit(0.05, "lines"),
  axis.text.x = element_blank(),
  axis.text.y = element_blank(),
  axis.ticks.x = element_blank(),
  axis.ticks.y = element_blank(),
  axis.title = element_blank(),
  strip.background.x = element_blank(),
  strip.background.y = element_blank(),
  strip.text = element_text(size = 8),
  strip.text.y.left = element_text(size = 10, angle = 90),
  strip.text.x.bottom = element_text(vjust = 1, color = NA),
  legend.key = element_blank(),
  legend.position = "none",
  legend.title = element_text(size = 8))

fstTheme = theme(
  axis.text.x = element_text(vjust = 3.5, size = 10, hjust = 0.5, color = "white"),
  axis.text.y = element_text(size = 10, color = "white", angle = 90, hjust = 0.5, vjust = -1.5),
  axis.title.x = element_blank(),
  axis.title.y = element_blank(),
  panel.grid.major = element_blank(),
  panel.border = element_blank(),
  axis.ticks = element_blank(),
  legend.title = element_text(size = 8, color = "black"),
  legend.text = element_text(size = 8, color = "black"),
  legend.position = c(0.6, 0.9),
  plot.background = element_blank(),
  panel.background = element_blank(),
)

violinTheme = theme(axis.title = element_text(color = "black", size = 12),
        axis.ticks.x = element_blank(),
        axis.text.x = element_text(color = "black", size = 10),
        legend.position = "none",
        legend.key.size = unit(0.3, 'cm'),
        panel.border = element_rect(color = "black", size = 1),
        panel.background = element_blank(),
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())


Sampling info


Map of study sites


flSites = read.csv("../data/flXestoMetaData.csv", header = TRUE)[-c(7, 15, 16, 32, 125, 126, 165, 197, 198, 253, 254, 314, 315, 353, 354),]
flSites$depthZone = factor(flSites$depthZone)
flSites$depthZone = factor(flSites$depthZone, levels = levels(flSites$depthZone)[c(2,1)])

flSites$region = factor(flSites$region)
flSites$region = factor(flSites$region, levels = levels(flSites$region)[c(3, 8, 1, 2, 7, 4, 6, 5)])
flSites$date = mdy(flSites$date) %>% format("%d %b %Y")

flPops = flSites %>% group_by(region) %>% summarise(latDD = mean(latDD), longDD = mean(longDD), n = n()) %>% droplevels()

flSampleSites = flSites %>% group_by(region, site, depthZone) %>% summarise(latDD = min(latDD), longDD = min(longDD))
## `summarise()` has grouped output by 'region', 'site'. You can override using the
## `.groups` argument.
# fknmsBounds = read.csv("../data/shp/fknmsSPA.csv", header = TRUE)
fknmsBounds = read_sf("../data/shp/fknms_py.shp") %>% st_transform(crs = 4326)
coralECA = read_sf("../data/shp/SEFLCoralReefEcosystemConservationArea.shp") %>% st_transform(crs = 4326)
states = st_as_sf(ne_states(country = c("United States of America")), scale = "count",  crs = 4326) %>% filter(name_en %in% c("Florida", "Georgia", "Alabama"))
countries = st_as_sf(ne_countries(country = c("Cuba", "Mexico", "The Bahamas", "Bermuda"), scale = "Large"), crs = 4326)
bahamas = read_sf("../data/shp/bahamasShoreline.shp") %>% st_transform(crs = 4326)
cuba = read_sf("../data/shp/cubaShoreline.shp") %>% st_transform(crs = 4326)
florida = read_sf("../data/shp/floridaShoreline.shp") %>% st_transform(crs = 4326)
bathy = read_sf("../data/shp/flBathy.shp") %>% st_transform(crs = 4326) %>% subset(subset = DATASET %in% c("fl_shelf", "fl_coast"))
tortugasBathy = read_sf("../data/shp/tortugasBathy.shp") %>% st_transform(crs = 4326)

MPA = c("KJCAP", "FKNMS")
latDD = 26
longDD = -81

bounds = data.frame(MPA,latDD,longDD)


Next we build a hi-res polygon of FL with the study site marked and a zoomed in map of the colony locations. We use ggspatial to add a north arrow and scale bar to the main map.

purples = c("purple1", pink)

floridaMap = ggplot() +
   geom_point(data = bounds, aes(x = longDD, y = latDD, fill = MPA, color = MPA), alpha = 0.1, shape = 22) +
   scale_fill_manual(values = purples, guide = "none") +
   ggnewscale::new_scale_fill() +
   scale_color_manual(values = purples, name = "MPA") +
   geom_sf(data = fknmsBounds, fill = purples[1], color = purples[1], alpha = 0.1, linewidth = 0.5) +
   geom_sf(data = coralECA, fill = purples[2], color = purples[2], alpha = 0.1, linewidth = 0.5) +
   scale_fill_manual(values = flPal, name = "Site") +
  scale_shape_manual(values = c(21, 23), name = "Depth") +
  geom_sf(data = florida, fill = "white", linewidth = 0.15) +
  geom_sf(data = cuba, fill = "white", linewidth = 0.15) +
  geom_sf(data = bahamas, fill = "white", linewidth = 0.15) +
  geom_point(data = flSampleSites, aes(x = longDD, y = latDD, shape = depthZone, fill = region), color = "gray30", size = 3, alpha = 1) +
  coord_sf(xlim = c(-83.25, -80), ylim = c(24.25, 27.25)) +
  scale_x_continuous(breaks = c(seq(-83, -80, by = 1))) +
  scale_y_continuous(breaks = c(seq(23, 27, by = 1))) +
  annotation_scale(location = "br", pad_x = unit(1.35, "cm"), text_pad = unit(-4.5, "cm")) +
  guides(fill = guide_legend(override.aes = list(shape = 22, color = "gray30", size = 3), order = 1), shape = guide_legend(override.aes = list(size = c(2.25, 2), stroke = 0.25, color = "black"), order = 2), color = guide_legend(override.aes = list(shape = 22, size = 3, fill = purples, color = purples, alpha = 1), order = 3)) +
  theme_bw() +
  theme(panel.background = element_rect(fill = "aliceblue"),
        plot.background = element_blank(),
        panel.border = element_rect(color = "black", size = 1, fill = NA),
        axis.title = element_blank(),
        axis.ticks = element_line(color = "black"),
        axis.text = element_text(color = "black"),
        legend.position = c(0.105, 0.4),
        legend.box.background = element_rect(linewidth = 0.35, fill = "white"),
        legend.title = element_text(color = "black", size = 8),
        legend.text = element_text(color = "black", size = 8),
        legend.spacing = unit(-5, "pt"),
        legend.key.size = unit(5, "pt"),
        legend.background = element_blank()
        )

# floridaMap

largeMap = inset = ggplot() +
  geom_sf(data = states, fill = "white", linewidth = 0.3) +
  geom_sf(data = countries, fill = "white", linewidth = 0.3) +
  geom_rect(aes(xmin = -83.25, xmax = -80, ymin = 24.25, ymax = 27.25), color = "black", fill = NA, alpha = 0.25, linewidth = 0.5) +
  geom_rect(aes(xmin = -78.8, xmax = -77, ymin = 22.2, ymax = 22.6), fill = "aliceblue", color = NA) +
  annotation_scale(location = "bl", pad_x = unit(2.25, "cm")) +
  annotation_north_arrow(location = "tr", style = north_arrow_minimal(), pad_x = unit(-0.3, "cm")) +
  coord_sf(xlim = c(-87, -76), ylim = c(22, 31)) +
  theme_bw() +
  theme(legend.title = element_text(size = 9, face = "bold"),
        axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        panel.background = element_rect(fill = "aliceblue"),
        panel.border = element_rect(color = "black", size = 1, fill = NA),
        legend.position = "none",
        plot.background = element_blank())

# largeMap
map = (floridaMap + 
  inset_element(largeMap, top = 1.07, right = 0.33, bottom = 0.63, left = -0.005, ignore_tag = TRUE))
  
ggsave("../figures/figure1.png", plot = map, height = 7, width = 7, units = "in", dpi = 300)


X. muta population genetics from SNPs


Analyzing 2bRAD generated SNPs (XXX loci) for population structure//genetic connectivity across sites and depth zones in FL

How many reads?

xestoReads = read.delim("../data/xestoRawReadCounts", header = FALSE)
colnames(xestoReads) = c("sample", "reads")

head(xestoReads)
##     sample     reads
## 1 FK-Xm1-1  70484555
## 2 FK-Xm1-2 102922235
## 3 FK-Xm1-3  59128545
## 4 FK-Xm1-4  72743555
## 5 FK-Xm1-5 119901675
## 6 FK-Xm1-6  61086586
#total reads
sum(xestoReads$reads)
## [1] 2003083840
#average reads/sample
(sum(xestoReads$reads)/366)
## [1] 5472907
xestoFiltReads = read.delim("../data/xestoFiltReadCounts", header = FALSE)

colnames(xestoFiltReads) = c("sample", "reads")

head(xestoFiltReads)
##    sample   reads
## 1 fk_X001 1945262
## 2 fk_X002  347932
## 3 fk_X003 2704686
## 4 fk_X004 4198543
## 5 fk_X005 3855099
## 6 fk_X006 1978019
#total reads
sum(xestoFiltReads$reads)
## [1] 1345919357
#average reads/sample
(sum(xestoFiltReads$reads)/366)
## [1] 3677375

Identifiying clonal multi-locus genotypes

Dendrogram with clones

Identification of any natural clones using technical replicates as a baseline for clonality between samples.

cloneBams = read.csv("../data/flXestoMetaData.csv") %>% dplyr::select(-sampleID, -date, -species)# list of bam files

cloneMa = as.matrix(read.table("../data/clones/xestoClones3.ibsMat")) # reads in IBS matrix produced by ANGSD 

dimnames(cloneMa) = list(cloneBams[,1],cloneBams[,1])
clonesHc = hclust(as.dist(cloneMa), "ave")

cloneSites = cloneBams$region
cloneDepth = cloneBams$depthZone

cloneDend = cloneMa %>% as.dist() %>% hclust(., "ave") %>% as.dendrogram()
cloneDData = cloneDend %>% dendro_data()

# Making the branches hang shorter so we can easily see clonal groups
cloneDData$segments$yend2 = cloneDData$segments$yend
for(i in 1:nrow(cloneDData$segments)) {
  if (cloneDData$segments$yend2[i] == 0) {
    cloneDData$segments$yend2[i] = (cloneDData$segments$y[i] - 0.01)}}

cloneDendPoints = cloneDData$labels
cloneDendPoints$site = cloneSites[order.dendrogram(cloneDend)]
cloneDendPoints$depth=cloneDepth[order.dendrogram(cloneDend)]
rownames(cloneDendPoints) = cloneDendPoints$label

# Making points at the leaves to place symbols for populations
point = as.vector(NA)
for(i in 1:nrow(cloneDData$segments)) {
  if (cloneDData$segments$yend[i] == 0) {
    point[i] = cloneDData$segments$y[i] - 0.01
  } else {
    point[i] = NA}}

cloneDendPoints$y = point[!is.na(point)]

techReps = c("FKX014.1","FKX014.2", "FKX014.3", "FKX029.1", "FKX029.2", "FKX121.1", "FKX121.2", "FKX121.3", "FKX159.1", "FKX159.2", "FKX190.1", "FKX190.2", "FKX190.3", "SFX012.1", "SFX012.2", "SFX012.3", "SFX071.1", "SFX071.2", "SFX071.3", "SFX108.1", "SFX108.2", "SFX108.3")

cloneDendPoints$depth = factor(cloneDendPoints$depth)
cloneDendPoints$depth = factor(cloneDendPoints$depth,levels(cloneDendPoints$depth)[c(2,1)])

cloneDendPoints$site = factor(cloneDendPoints$site)
cloneDendPoints$site = factor(cloneDendPoints$site,levels(cloneDendPoints$site)[c(3, 8, 1, 2, 7, 4, 6, 5)])

cloneDendA = ggplot() +
  geom_segment(data = segment(cloneDData), aes(x = x, y = y, xend = xend, yend = yend2), linewidth = 0.5) +
  geom_point(data = cloneDendPoints, aes(x = x, y = y, fill = site, shape = depth), size = 4, stroke = 0.25) +
  #scale_fill_brewer(palette = "Dark2", name = "Population") +
  scale_fill_manual(values = flPal, name= "Population")+
  scale_shape_manual(values = c(24, 25), name = "Depth Zone")+
  geom_hline(yintercept = 0.145, color = pink, lty = 5, linewidth = 0.75) + # creating a dashed line to indicate a clonal distance threshold
  geom_text(data = subset(cloneDendPoints, subset = label %in% techReps), aes(x = x, y = (y - .015), label = label), angle = 90) + # spacing technical replicates further from leaf
  geom_text(data = subset(cloneDendPoints, subset = !label %in% techReps), aes(x = x, y = (y - .010), label = label), angle = 90) +
  labs(y = "Genetic distance (1 - IBS)") +
  guides(fill = guide_legend(override.aes = list(shape = 22)))+
  theme_classic()

cloneDend = cloneDendA + theme(
  axis.title.x = element_blank(),
  axis.text.x = element_blank(),
  axis.line.x = element_blank(),
  axis.ticks.x = element_blank(),
  axis.title.y = element_text(size = 12, color = "black", angle = 90),
  axis.text.y = element_text(size = 10, color = "black"),
  axis.line.y = element_line(),
  axis.ticks.y = element_line(),
  panel.grid = element_blank(),
  panel.border = element_blank(),
  panel.background = element_blank(),
  plot.background = element_blank(),
  legend.key = element_blank(),
  legend.title = element_text(size = 12),
  legend.text = element_text(size = 10),
  legend.position = "bottom")

# cloneDend

ggsave("../figures/rmd/cloneDend.png", plot = cloneDend, height = 8, width = 40, units = "in", dpi = 300)


We removed the technical replicates/clones and re-ran ANGSD for all the following pop-gen analyses.

Structure and dendrograms

bams = read.csv("../data/flXestoMetaData.csv")[-c(7, 15, 16, 32, 125, 126, 165, 197, 198, 253, 254, 314, 315, 353, 354),] # list of bams files and their populations (tech reps removed)
bams$sample <- gsub("\\.[1-3]*$", "", bams$sample)

bams$region = factor(bams$region)
bams$region = factor(bams$region, levels = levels(bams$region)[c(3, 8, 1, 2, 7, 4, 6, 5)])

bams$depthZone = factor(bams$depthZone)
bams$depthZone = factor(bams$depthZone, levels = levels(bams$depthZone)[c(2,1)])

# ma = as.matrix(read.table("../data/ftl/xestoFTL.ibsMat"))
ma = as.matrix(read.table("../data/xestoNoClones.ibsMat")) # reads in IBS matrix produced by ANGSD


dimnames(ma) = list(bams[,3],bams[,3])


## admixture K = 2
#-------------------------------------
K2ad = read.table("../data/admix/noClones/K2.output") %>% dplyr::select(V6, V7) 
K2ad %>% summarise(sum(V6),sum(V7))
##    sum(V6) sum(V7)
## 1 301.7527 49.2473
admixK2 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K2ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7")
admixK2_melt = melt(admixK2, id = c("sample", "site", "depth", "depthm"))

admixK2$site.x = 1

## admixture K = 3
#-------------------------------------
K3ad = read.table("../data/admix/noClones/K3.output") %>% dplyr::select(V6, V7, V8) 
K3ad %>% summarise(sum(V6),sum(V7), sum(V8))
##    sum(V6) sum(V7) sum(V8)
## 1 211.2603 43.1265 96.6129
admixK3 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K3ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm1.2" = "V8") %>%
  dplyr::select(order(colnames(.)))
admixK3_melt = melt(admixK3, id = c("sample", "site", "depth", "depthm"))


## ngsAdmix K = 4
#-------------------------------------
K4ad = read.table("../data/admix/noClones/K4.output") %>% dplyr::select(V6, V7, V8, V9) 
K4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
##    sum(V6) sum(V7) sum(V8) sum(V9)
## 1 162.8505 38.5994 81.0511 68.4982
admixK4 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K4ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm1.2" = "V8", "Xm1.3" = "V9") %>%
  dplyr::select(order(colnames(.)))
admixK4_melt = melt(admixK4, id = c("sample", "site", "depth", "depthm"))


## admixture K = 5
#-------------------------------------
K5ad = read.table("../data/admix/noClones/K5.output") %>% dplyr::select(V6, V7, V8, V9, V10)
K5ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9), sum(V10))
##    sum(V6) sum(V7) sum(V8) sum(V9) sum(V10)
## 1 103.7482 37.3177 71.0979 57.7997  81.0367
admixK5 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K5ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm1.2" = "V8", "Xm1.3" = "V9", "Xm1.4" = "V10") #%>%
admixK5_melt = melt(admixK5, id = c("sample", "site", "depth", "depthm"))

## construct figure
#-------------------------------------
# ggtree(tr) + geom_nodelab(aes(label = node), hjust = -0.5) 

{
  tr = hclust(dist(ma),"ave") 

  p1 = ggtree(tr, layout = "rectangular", size = 0.35) +  
    geom_point2(aes(subset = (node == 354)), shape = 21, size = 4.5, fill = xestoKColPal[2]) +
    geom_point2(aes(subset = (node == 357)), shape = 21, size = 4.5, fill=xestoKColPal[6]) +
    geom_point2(aes(subset = (node == 358)), shape = 21, size = 4.5, fill = xestoKColPal[1]) +
    geom_cladelabel(node = 354, hjust = 13.35, vjust = -0.1, label = "Xm2", barsize = 0) +
    geom_cladelabel(node = 357, hjust = 3.87, vjust = 1.2, label = "Admixed", barsize = 0) +
    geom_cladelabel(node = 358, hjust = 7.8, vjust = 17.7, label = "Xm1", barsize = 0)
  
  p2 = p1 + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = site), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.02, offset = 0.3) + 
    scale_fill_manual("Site", values = flPal) +
    new_scale_fill()

  p3 = p2 + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depth), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.02, offset = 1.54) + 
    scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF")) +
    new_scale_fill()
  
  p4 = p3  + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depthm), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.02, offset = 4.02) + 
  scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse") +
    new_scale_fill()
  
  p5 = p4 + geom_fruit(data = admixK2_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable), 
                 stat='identity', color = "grey25", pwidth = 0.2, offset = 9.0, linewidth = 0.1) +
     scale_fill_manual("Lineage",values = xestoKColPal[1:5])
  
  p6 = p5 + geom_fruit(data = admixK3_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable), 
                 stat='identity', color = "grey25", pwidth = 0.12, 
                 offset = 18.515, linewidth = 0.1) 
 
  p7 = p6 + geom_fruit(data = admixK4_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable), 
                 stat='identity', color = "grey25", pwidth = 0.12, 
                 offset = 37.757, linewidth = 0.1) 
 
  p8 = p7 + geom_fruit(data = admixK5_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable), 
                 stat='identity', color = "grey25", pwidth = 0.12, 
                 offset = 76.241, linewidth = 0.1) +
  coord_cartesian(xlim = c(-1.13, 0.72)) +
  theme_tree(legend.position = c(0.96, 0.5))
}

Population structure

PCA

cov = read.table("../data/pcangsd/flXestoPcangsd.cov") %>% as.matrix()


pcAdmix = read.table("../data/admix/noClones/K4.output") %>% dplyr::select(V6, V7, V8, V9)
pcAdmix %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
##    sum(V6) sum(V7) sum(V8) sum(V9)
## 1 162.8505 38.5994 81.0511 68.4982
pcAdmix = pcAdmix %>% dplyr::rename("cluster1" = "V6", "cluster2" = "V7", "cluster3" = "V8", "cluster4" = "V9") %>% dplyr::select(order(colnames(.)))
  
pcaEig = eigen(cov)
xestoPcaVar = pcaEig$values/sum(pcaEig$values)*100
head(xestoPcaVar)
## [1] 5.839551 2.713174 2.008476 1.797844 1.294318 1.241920
pcangsd = bams %>%  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% cbind(pcAdmix)

pcangsd$sitedepth = as.factor(paste(pcangsd$site, pcangsd$depth, sep = " "))

pcangsd$sitedepth = factor(pcangsd$sitedepth)
pcangsd$sitedepth = factor(pcangsd$sitedepth, levels(pcangsd$sitedepth)[c(3, 12, 1, 2, 11, 10, 5, 4, 9, 8, 7, 6)])

pcangsd$PC1 = pcaEig$vectors[,1]
pcangsd$PC2 = pcaEig$vectors[,2]
pcangsd$PC3 = pcaEig$vectors[,3]
pcangsd$PC4 = pcaEig$vectors[,4]

pcangsdClustK2 = admixK2 %>% mutate(clusterK2 = ifelse(Xm1 >=0.75, "Xm1", ifelse(Xm2 >= 0.75, "Xm2", "Admixed")))
pcangsdClustK2$clusterK2 = as.factor(pcangsdClustK2$clusterK2)
levels(pcangsdClustK2$clusterK2)
## [1] "Admixed" "Xm1"     "Xm2"
pcangsdClustK2$clusterK2 = factor(pcangsdClustK2$clusterK2, levels = levels(pcangsdClustK2$clusterK2)[c(2,3,1)])


pcangsdClustK4 = admixK4 %>% mutate(clusterK4 = ifelse(Xm1 >=0.75, "Xm1", ifelse(Xm2 >= 0.75, "Xm2", ifelse(Xm1.2 >= 0.75, "Xm1.2", ifelse(Xm1.3 >= 0.75, "Xm1.3", "Admixed")))))
pcangsdClustK4$clusterK4 = as.factor(pcangsdClustK4$clusterK4)
levels(pcangsdClustK4$clusterK4)
## [1] "Admixed" "Xm1"     "Xm1.2"   "Xm1.3"   "Xm2"
pcangsdClustK4$clusterK4 = factor(pcangsdClustK4$clusterK4, levels = levels(pcangsdClustK4$clusterK4)[c(2,5,3,4,1)])


pcangsd = pcangsd %>% mutate(clusterK2 = pcangsdClustK2$clusterK2, clusterK4 = pcangsdClustK4$clusterK4)

# bamsClusters = pcangsd %>% dplyr::select(sample, cluster) %>% dplyr::arrange(sample) 
# bamsSamples = read.delim("../data/snps/bamsNoClones", header = FALSE)
# bamsClusters$sample = bamsSamples$V1

# bamsClusters = as.data.frame(bamsClusters)

# write.table(x = bamsClusters, file = "../data/snps/bamsClusters", sep = "\t", row.names = FALSE, col.names = FALSE, quote = FALSE)

pcangsd = merge(pcangsd, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, pcangsd, mean), by="sitedepth")
# 
# set.seed(942)
# 
# pcPerm = adonis2(pcangsd[6:9] ~ site*depth*clusterK2, data = pcangsd, permutations = 999, method = "euclidean")
# 
# pcPerm
pcangsd %>% group_by(clusterK4) %>% dplyr::summarize(n = n(),  prop = n*0.75)
## # A tibble: 5 × 3
##   clusterK4     n  prop
##   <fct>     <int> <dbl>
## 1 Xm1          59  44.2
## 2 Xm2          19  14.2
## 3 Xm1.2        17  12.8
## 4 Xm1.3        12   9  
## 5 Admixed     244 183
pcangsd %>% group_by(depth,clusterK4) %>% dplyr::summarize(n = n())
## `summarise()` has grouped output by 'depth'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 3
## # Groups:   depth [2]
##   depth      clusterK4     n
##   <fct>      <fct>     <int>
## 1 Shallow    Xm1          55
## 2 Shallow    Xm2          19
## 3 Shallow    Xm1.2        15
## 4 Shallow    Xm1.3         3
## 5 Shallow    Admixed     148
## 6 Mesophotic Xm1           4
## 7 Mesophotic Xm1.2         2
## 8 Mesophotic Xm1.3         9
## 9 Mesophotic Admixed      96

Plot PCA

#

pcaPlotSA = ggplot() +
  geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
  geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
  geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = site, shape = depth), color = "black", stroke = 0.25, size = 4, alpha = 0.5, show.legend = FALSE) +
  geom_point(data = pcangsd, aes(x = mean.x, y = mean.y, fill = site, shape = depth), color = "black", size = 5, alpha = 1, stroke = 0.5) +
  scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
  scale_fill_manual(values = flPal, name = "Site") +
  scale_color_manual(values = flPal, name = "Site") +
  labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
  guides(shape = guide_legend(override.aes = list(size = 3, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = flPal, alpha = NA), order = 1, ncol = 1)) +
  theme_bw()

pcaPlotS = pcaPlotSA +
  pcaTheme +
  theme(legend.position = c(0.15, 0.24))


pcaPlotK4A = ggplot() +
  geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
  geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
  geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = clusterK4, shape = depth), color = "black", size = 4, alpha = 1, show.legend = TRUE) +
  scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
  scale_fill_manual(values = c(xestoKColPal[c(1:4,6)]), name = "Lineage") +
  labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
  guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = c(xestoKColPal[c(1:4,6)]), alpha = NA), order = 1, ncol = 1))+
  theme_bw()

pcaPlotK4 = pcaPlotK4A +
  pcaTheme +
  theme(legend.position = c(0.12, 0.15))


pcaPlotK2A = ggplot() +
  geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
  geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
  geom_point(data = pcangsd, aes(x = PC1, y = PC2, fill = clusterK2, shape = depth), color = "black", size = 4, show.legend = TRUE) +
  scale_shape_manual(values = c(21, 23), name = "Depth Zone", ) +
  scale_fill_manual(values = c(xestoKColPal[c(1,2,6)]), name = "Lineage") +
  labs(x = paste0("PC 1 (", format(round(xestoPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVar[2], 1), nsmall = 1), " %)")) +
  guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = xestoKColPal[c(1,2,6)], alpha = NA), order = 1, ncol = 1)) +
  theme_bw()

pcaPlotK2 = pcaPlotK2A +
  pcaTheme +
  theme(legend.position = c(0.12, 0.15))

pcaPlotK2

Put all plots together

pcaPlots = (pcaPlotS | pcaPlotK2 | pcaPlotK4) +
  plot_annotation(tag_levels = 'A') &
  theme(plot.tag = element_text(size = 18),
        panel.background = element_rect(fill = "white"),
        legend.spacing = unit(-5, "pt"),
        legend.key = element_blank(),
        legend.background = element_blank())

pcaPlots

fig2 = (p8 / pcaPlots) +
  plot_layout(heights = c(1.75, 1)) +
  plot_annotation(tag_levels = 'A') &
  theme(plot.tag = element_text(size = 20),
        legend.spacing = unit(-5, "pt"),
        legend.key = element_blank(),
        legend.background = element_blank())

ggsave("../figures/figure2.png", plot = fig2, height = 12, width = 14, units = "in", dpi = 300)

ggsave("../figures/figure2.svg", plot = fig2, height = 12, width = 14, units = "in", dpi = 300)

Genetic Diversity

Lineage demographics

K2ad = read.table("../data/admix/noClones/K2.output") %>% dplyr::select(V6, V7) 
K2ad %>% summarise(sum(V6),sum(V7))
##    sum(V6) sum(V7)
## 1 301.7527 49.2473
admixK2 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K2ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7")

k2lineage = admixK2 %>% mutate(cluster = ifelse(Xm1 >= .75, "Xm1", ifelse(Xm2 >= .75, "Xm2", "Admixed")))

K3ad = read.table("../data/admix/noCLones/K3.output") %>% dplyr::select(V6, V7, V8) 
K3ad %>% summarise(sum(V6),sum(V7), sum(V8))
##    sum(V6) sum(V7) sum(V8)
## 1 211.2603 43.1265 96.6129
admixK3 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K3ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8")

k3lineage = admixK3 %>% mutate(cluster = ifelse(Xm1 >= .75, "Xm1", ifelse(Xm2 >= .75, "Xm2", ifelse(Xm3 >= .75, "Xm3", "Admixed"))))

K4ad = read.table("../data/admix/noClones/K4.output") %>% dplyr::select(V6, V7, V8, V9) 
K4ad %>% summarise(sum(V6),sum(V7), sum(V8), sum(V9))
##    sum(V6) sum(V7) sum(V8) sum(V9)
## 1 162.8505 38.5994 81.0511 68.4982
admixK4 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K4ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8", "Xm4" = "V9")

k4lineage = admixK4 %>% mutate(cluster = ifelse(Xm1 >= .75, "Xm1", ifelse(Xm2 >= .75, "Xm2", ifelse(Xm3 >= .75, "Xm3", ifelse(Xm4 >= .75, "Xm4","Admixed")))))


k2Bams = k2lineage %>% select(sample, cluster)
k2Bams$sample = gsub("FK", "fk_", k2Bams$sample)
k2Bams$sample = gsub("SF", "sf_", k2Bams$sample)
k2Bams$sample = paste(k2Bams$sample, ".trim.bt2.bam", sep ="")

write_delim(k2Bams, "../data/k2bams", col_names = FALSE)

k3Bams = k3lineage %>% select(sample, cluster)
k3Bams$sample = gsub("FK", "fk_", k3Bams$sample)
k3Bams$sample = gsub("SF", "sf_", k3Bams$sample)
k3Bams$sample = paste(k3Bams$sample, ".trim.bt2.bam", sep ="")

write_delim(k3Bams, "../data/k3bams", col_names = FALSE)

k4Bams = k4lineage %>% select(sample, cluster)
k4Bams$sample = gsub("FK", "fk_", k4Bams$sample)
k4Bams$sample = gsub("SF", "sf_", k4Bams$sample)
k4Bams$sample = paste(k4Bams$sample, ".trim.bt2.bam", sep ="")

write_delim(k4Bams, "../data/k4bams", col_names = FALSE)

k2lineage %>% group_by(cluster) %>% summarise(n = n(), perc = n*0.75)
## # A tibble: 3 × 3
##   cluster     n  perc
##   <chr>   <int> <dbl>
## 1 Admixed    40  30  
## 2 Xm1       292 219  
## 3 Xm2        19  14.2
k3lineage %>% group_by(cluster) %>% summarise(n = n(), perc = n*0.75)
## # A tibble: 4 × 3
##   cluster     n  perc
##   <chr>   <int> <dbl>
## 1 Admixed   162 122. 
## 2 Xm1       138 104. 
## 3 Xm2        20  15  
## 4 Xm3        31  23.2
k4lineage %>% group_by(cluster) %>% summarise(n = n(), perc = n*0.75)
## # A tibble: 5 × 3
##   cluster     n  perc
##   <chr>   <int> <dbl>
## 1 Admixed   244 183  
## 2 Xm1        59  44.2
## 3 Xm2        19  14.2
## 4 Xm3        17  12.8
## 5 Xm4        12   9
hetK2Data = read.delim("../data/het/flXestoK2Het", header = FALSE, sep = " ") %>% rename("k2Het" = "V2") %>% mutate(sample = bams$sample) %>% left_join(pcangsdClustK2) %>% select(sample, k2Het, clusterK2)
## Joining with `by = join_by(sample)`
hetK4Data = read.delim("../data/het/flXestoK4Het", header = FALSE, sep = " ") %>% rename("k4Het" = "V2")%>% mutate(sample = bams$sample) %>% left_join(pcangsdClustK4) %>% select(sample, k4Het, clusterK4)
## Joining with `by = join_by(sample)`
xestoHet = bams %>% left_join(hetK2Data) %>% left_join(hetK4Data)
## Joining with `by = join_by(sample)`
## Joining with `by = join_by(sample)`
hetK2AOV = welch_anova_test(xestoHet, k2Het ~ clusterK2)

hetK2PH = games_howell_test(xestoHet, k2Het ~ clusterK2)

hetK2PH$p.adj
## [1] 0.00000034800000 0.00000000000549 0.00003810000000
hetK2Letters = data.frame(x = factor(c("Xm1", "Xm2", "Admixed")), y = c(0.0025, 0.0025, 0.0025),  grp = c("a", "b", "c"))


hetK4AOV = welch_anova_test(xestoHet, k4Het ~ clusterK4) 
hetK4PH = games_howell_test(xestoHet, k4Het ~ clusterK4) 

hetK4PH$p.adj
##  [1] 0.00000084200 0.00006550000 0.07400000000 0.00000000782 0.00001210000 0.00000310000
##  [7] 0.00000596000 0.35100000000 0.41500000000 0.93400000000
hetK4Letters = data.frame(x = factor(c("Xm1", "Xm2", "Xm1.2", "Xm1.3","Admixed")), y = c(0.0025, 0.0025, 0.0025, 0.0025, 0.0025),  grp = c("a", "b", "c", "ac", "cd"))
hetPlotK2 = ggplot(data = xestoHet, aes(x = clusterK2, y = k2Het)) +
  geom_violin(aes(fill = clusterK2, group = clusterK2), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
  geom_boxplot(aes(fill = clusterK2, group = clusterK2), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) +  xlab("Lineage") +
  geom_text(data = hetK2Letters, aes(x = x, y = y, label = grp), size = 4) +
   annotate(geom = "text", x = 2.95, y = 0, label = sprintf("italic(F) == '%0.2f' * ';' ~ italic(p) < 0.001", hetK2AOV$statistic), size = 3, parse = TRUE) +
  scale_fill_discrete(type = xestoKColPal[c(1,2,6)], name = "Lineage") +
  xlab("Lineage") +
  ylab("Heterozygosity") +
  scale_y_continuous(breaks = seq(0, 0.0025, 0.0005)) +
  coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15), ylim = c(0, 0.0025)) +
  theme_bw() + 
  violinTheme

hetPlotK2

hetPlotK4 = ggplot(data = xestoHet, aes(x = clusterK4, y = k4Het)) +
  geom_violin(aes(fill = clusterK4, group = clusterK4), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
  geom_boxplot(aes(fill = clusterK4, group = clusterK4), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) +  xlab("Lineage") +
  geom_text(data = hetK4Letters, aes(x = x, y = y, label = grp), size = 4) +
   annotate(geom = "text", x = 4.5, y = 0, label = sprintf("italic(F) == '%0.2f' * ';' ~ italic(p) < 0.001", hetK4AOV$statistic), size = 3, parse = TRUE) +
  scale_fill_discrete(type = xestoKColPal[c(1,2,3,4,6)], name = "Lineage") +
  xlab("Lineage") +
  ylab("Heterozygosity") +
  scale_y_continuous(breaks = seq(0, 0.0025, 0.0005)) +
  coord_cartesian(expand = TRUE, xlim = c(0.85, 5.15)) +
  theme_bw() +
  violinTheme

hetPlotK4

pop.order = c("Xm1.3", "Xm1.2", "Xm2", "Xm1")

# reads in fst 
fstMa1 <- read.delim("../data/xestoFstK4.out") %>% dplyr::select(-fst) %>% df_to_pw_mat(., "pop1", "pop2", "weightedFst")

fstMa1
##            Xm1      Xm2    Xm1.2    Xm1.3
## Xm1   0.000000 0.136042 0.033682 0.020161
## Xm2   0.136042 0.000000 0.113342 0.119521
## Xm1.2 0.033682 0.113342 0.000000 0.036817
## Xm1.3 0.020161 0.119521 0.036817 0.000000
fstMa = fstMa1

upperTriangle(fstMa, byrow = TRUE) <- lowerTriangle(fstMa)
fstMa <- fstMa[,pop.order] %>%
  .[pop.order,]
fstMa[upper.tri(fstMa)] <- NA
fstMa <- as.data.frame(fstMa)

# rearrange and reformat matrix
fstMa$Pop = factor(row.names(fstMa), levels = unique(pop.order))


# melt matrix data (turn from matrix into long dataframe)
fst = melt(fstMa, id.vars = "Pop", value.name = "Fst", variable.name = "Pop2", na.rm = FALSE)

fst$Fst = round(fst$Fst, 3)


fstHeatmap = ggplot(data = fst %>% filter(Fst != 0), aes(Pop, Pop2, fill = as.numeric(as.character(Fst)))) +
  geom_tile(color = "white") +
  geom_segment(data = fst, aes(x = 0.475, xend = 0.15, y = Pop2, yend = Pop2, color = Pop2), size = 20) + #colored boxes for Y-axis labels
  geom_segment(data = fst, aes(x = Pop, xend = Pop, y = 0.2, yend = 0.475, color = Pop), size = 22.5) + #colored boxes for X-axis labels
  scale_color_manual(values = rev(xestoKColPal[c(1:4)]), guide = NULL) +
  scale_fill_gradient(low = "white", high = "gray40", limit = c(0, 0.22), space = "Lab", name = expression(paste(italic("F")[ST])), na.value = NA,  guide = "colourbar") +
  geom_text(data = fst %>% filter(Fst != 0), aes(Pop, Pop2, label = Fst), color = "black", size = 3.5) +
  guides(fill = guide_colorbar(barwidth = 7.5, barheight = 0.75, title.position = "top", title.hjust = 0.5, direction = "horizontal", ticks.colour = "black", frame.colour = "black")) +
  scale_y_discrete(position = "left", limits = levels(fst$Pop2)) +
  scale_x_discrete(limits = rev(levels(fst$Pop2)[c(1:4)])) +
  coord_cartesian(xlim = c(1, 4), ylim = c(1, 4), clip = "off") +
  theme_minimal() +
  fstTheme

fstHeatmap

Nucleotide diversity

npgList = list(read_tsv("../data/k2Xm1.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=2", lineage = "Xm1"),
               read_tsv("../data/k2Xm2.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=2", lineage = "Xm2"),
               read_tsv("../data/k4Xm1.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm1"),
               read_tsv("../data/k4Xm2.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm2"),
               read_tsv("../data/k4Xm3.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm3"),
               read_tsv("../data/k4Xm4.thetas.idx.pestPG", show_col_types = FALSE) %>% mutate(K = "*K*=4", lineage = "Xm4"))


piAll = purrr::reduce(npgList, rbind) %>% 
  dplyr::group_by(K, lineage) %>%
  filter(tP != 0) %>%
  mutate(tPps = tP/nSites) %>%
  dplyr::summarize(tPps = mean(tPps))
## `summarise()` has grouped output by 'K'. You can override using the `.groups` argument.
piAll$Ne = (piAll$tPps)/(4*2e-8)
piAll
## # A tibble: 6 × 4
## # Groups:   K [2]
##   K     lineage    tPps     Ne
##   <chr> <chr>     <dbl>  <dbl>
## 1 *K*=2 Xm1     0.00407 50861.
## 2 *K*=2 Xm2     0.00540 67442.
## 3 *K*=4 Xm1     0.00438 54742.
## 4 *K*=4 Xm2     0.00659 82387.
## 5 *K*=4 Xm3     0.00467 58378.
## 6 *K*=4 Xm4     0.00434 54191.
piAll$K = factor(piAll$K)
piAll$lineage = factor(piAll$lineage)
nuclDivPlot = ggplot(piAll, aes(x = K, y = tPps, fill = lineage, group - lineage)) +
  geom_rect(aes(xmin = 1.385, xmax = 10, ymin = -1, ymax = 100000), fill = "black", color = NA, alpha = 0.03, linewidth = 0) +
  geom_bar(position = position_dodge2(preserve = "single"), stat = "identity", color = "black") +
  scale_fill_manual(values = xestoKColPal) +
  geom_text(position = position_dodge2(preserve = "single", width = 0.9), aes(y = tPps-.0015, label= scales::comma(round(Ne,0)), hjust = 0, fontface = "bold"), angle = 90, color = "white", ) +
  labs(x = bquote(~italic(K)), y = bquote("Nucleotide diversity ("*pi*")")) + 
  coord_cartesian(xlim = c(0.4, 2.8), ylim = c(0, 0.007), expand = FALSE) +
  theme_bw() +
  violinTheme +
  theme(axis.text.x = ggtext::element_markdown())
  
nuclDivPlot

Heterozygosity

hetPlots = hetPlotK2 + hetPlotK4 + nuclDivPlot + fstHeatmap +
  plot_annotation(tag_levels = 'A') &
  theme(plot.tag = element_text(size = 14),
        legend.text = element_text(size = 8),
        legend.title = element_text(size = 10))
  
hetPlots

ggsave("../figures/figure5.png", plot = hetPlots, width = 8, height = 7, units = "in", dpi = 300)

Size class structure

xmSizeData = read.csv("../data/xestoSEFLSampleData.csv") %>% mutate("sample" = tubeID, "site" = region) %>% dplyr::select(-c("region", "species", "sampleID", "tubeID", "depthFt")) %>%  dplyr::relocate(sample, .before = latDD ) %>% dplyr::relocate(site, .before = latDD ) 

xmSize = xmSizeData %>% left_join(pcangsd) %>% left_join(admixK2) %>% dplyr::select(sample, site, depthm, latDD, Xm1, Xm2, PC1, PC2, PC3, clusterK2, clusterK4, h, od, bd) %>%  dplyr::mutate(volume = ((1/12)*pi*h*(od^2 + od * bd + bd^2))) %>% mutate(sizeClass = case_when(volume <= 143.13 ~ "I",
      volume <= 1077.13 ~ "II",
      volume <= 5666.32 ~ "III",
      volume <= 17383.97 ~ "IV",
      volume > 17383.97 ~ "V"))
## Joining with `by = join_by(sample, site)`
## Joining with `by = join_by(sample, site, depth, depthm)`
xmSize$sizeClass <- ordered(xmSize$sizeClass, levels = c("I", "II", "III", "IV", "V"))

xmSize$site = factor(xmSize$site)
xmSize$site = factor(xmSize$site, levels = levels(xmSize$site)[c(3,4,1,2)])

volumePlot = ggplot(data = xmSize, aes(x = log(volume))) +
  # geom_density()+
  geom_histogram(bins=5)+
  scale_fill_manual(values = xestoKColPal[c(1,2,6)], name = "Lineage") +
  # scale_x_discrete(drop = FALSE)+
  xlab("log(Volume)") +
  ylab("Density") +
  
  theme_bw() +
  violinTheme + 
  theme(legend.position = "right")

volumePlot

# By Lineage
xmTableLineage = table(xmSize$sizeClass, xmSize$clusterK2)

fisher.test(xmTableLineage, simulate.p.value = TRUE, B = 100000)
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on 100000
##  replicates)
## 
## data:  xmTableLineage
## p-value = 0.00001
## alternative hypothesis: two.sided
## Proportions per lineage
prop_lineage <- prop.table(xmTableLineage, margin = 2)
round(prop_lineage, 3)
##      
##         Xm1   Xm2 Admixed
##   I   0.000 0.000   0.000
##   II  0.022 0.316   0.250
##   III 0.157 0.579   0.250
##   IV  0.180 0.053   0.250
##   V   0.640 0.053   0.250
## Expected counts under independence
row_totals <- rowSums(xmTableLineage)
col_totals <- colSums(xmTableLineage)
grand_total <- sum(xmTableLineage)

expected <- outer(row_totals, col_totals) / grand_total
round(expected, 2)
##       Xm1  Xm2 Admixed
## I    0.00 0.00     0.0
## II   8.16 1.74     1.1
## III 20.77 4.43     2.8
## IV  14.83 3.17     2.0
## V   45.24 9.66     6.1
## Difference between observed and expected
### Neg under-representation, Pos over-

diffLineage <- xmTableLineage - expected
diffLineage
##      
##             Xm1       Xm2   Admixed
##   I    0.000000  0.000000  0.000000
##   II  -6.158333  4.258333  1.900000
##   III -6.766667  6.566667  0.200000
##   IV   1.166667 -2.166667  1.000000
##   V   11.758333 -8.658333 -3.100000
# By Site
xmTableSite = table(xmSize$sizeClass, xmSize$site)

fisher.test(xmTableSite, simulate.p.value = TRUE, B = 100000)
## 
##  Fisher's Exact Test for Count Data with simulated p-value (based on 100000
##  replicates)
## 
## data:  xmTableSite
## p-value = 0.00001
## alternative hypothesis: two.sided
## Proportions per site
prop_lineage <- prop.table(xmTableSite, margin = 2)
round(prop_lineage, 3)
##      
##       Jupiter West Palm Boynton Ft. Lauderdale
##   I     0.000     0.000   0.000          0.000
##   II    0.167     0.000   0.000          0.200
##   III   0.300     0.033   0.067          0.533
##   IV    0.233     0.033   0.233          0.167
##   V     0.300     0.933   0.700          0.100
# Expected counts under independence
row_totals <- rowSums(xmTableSite)
col_totals <- colSums(xmTableSite)
grand_total <- sum(xmTableSite)

expected <- outer(row_totals, col_totals) / grand_total
round(expected, 2)
##     Jupiter West Palm Boynton Ft. Lauderdale
## I      0.00      0.00    0.00           0.00
## II     2.75      2.75    2.75           2.75
## III    7.00      7.00    7.00           7.00
## IV     5.00      5.00    5.00           5.00
## V     15.25     15.25   15.25          15.25
#Difference between observed and expected
##Neg under-representation, Pos over-

diffSite <- xmTableSite - expected
diffSite
##      
##       Jupiter West Palm Boynton Ft. Lauderdale
##   I      0.00      0.00    0.00           0.00
##   II     2.25     -2.75   -2.75           3.25
##   III    2.00     -6.00   -5.00           9.00
##   IV     2.00     -4.00    2.00           0.00
##   V     -6.25     12.75    5.75         -12.25
xmLm = lmer(log(volume) ~ Xm1 + (1 | site), data = xmSize)
# xmLm = lmer(volume ~ Xm1 + (1 | latDD), data = xmSize)

qqnorm(residuals(xmLm))
qqline(residuals(xmLm))

xmLm2 = lmer(log(volume) ~ Xm1 + depthm +  (1 | site), data = xmSize)
qqnorm(residuals(xmLm2))
qqline(residuals(xmLm2))

check_collinearity(xmLm2)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##    Term  VIF    VIF 95% CI adj. VIF Tolerance Tolerance 95% CI
##     Xm1 1.02 [1.00, 35.94]     1.01      0.98     [0.03, 1.00]
##  depthm 1.02 [1.00, 35.94]     1.01      0.98     [0.03, 1.00]
anova(xmLm, xmLm2)
## refitting model(s) with ML (instead of REML)
## Data: xmSize
## Models:
## xmLm: log(volume) ~ Xm1 + (1 | site)
## xmLm2: log(volume) ~ Xm1 + depthm + (1 | site)
##       npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
## xmLm     4 396.01 407.16 -194.00    388.01                     
## xmLm2    5 396.42 410.35 -193.21    386.42 1.5949  1     0.2066
summary(xmLm)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(volume) ~ Xm1 + (1 | site)
##    Data: xmSize
## 
## REML criterion at convergence: 387.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.91177 -0.72401  0.00932  0.67260  2.53704 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  site     (Intercept) 0.8318   0.912   
##  Residual             1.3706   1.171   
## Number of obs: 120, groups:  site, 4
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   8.0097     0.5795  13.823
## Xm1           1.9081     0.4399   4.337
## 
## Correlation of Fixed Effects:
##     (Intr)
## Xm1 -0.589
#effect size, converted from log
exp(fixef(xmLm)["Xm1"])
##      Xm1 
## 6.739935
coef <- fixef(xmLm)["Xm1"]
fc <- exp(coef)
ci <- exp(confint(xmLm, parm = "Xm1", method = "Wald"))

# R² values
r2vals <- r.squaredGLMM(xmLm)

# Semi-partial R²
r2beta_vals <- r2beta(xmLm, method = "nsj")

report <- data.frame(
  Term = "Xm1 (Xm2 vs Xm1)",
  Estimate = round(coef, 3),
  SE = round(summary(xmLm)$coefficients["Xm1", "Std. Error"], 3),
  t_value = round(summary(xmLm)$coefficients["Xm1", "t value"], 3),
  FoldChange = round(fc, 2),
  CI_lower = round(ci[1], 2),
  CI_upper = round(ci[2], 2),
  R2_marginal = round(r2vals[1], 2),
  R2_conditional = round(r2vals[2], 2),
  R2_partial = round(r2beta_vals$Rsq[2], 2)
)

print(report)
##                 Term Estimate   SE t_value FoldChange CI_lower CI_upper R2_marginal
## Xm1 Xm1 (Xm2 vs Xm1)    1.908 0.44   4.337       6.74     2.85    15.96        0.18
##     R2_conditional R2_partial
## Xm1           0.49       0.18
# Random effects
ranef_summary <- as.data.frame(VarCorr(xmLm))
print(ranef_summary)
##        grp        var1 var2      vcov     sdcor
## 1     site (Intercept) <NA> 0.8317721 0.9120154
## 2 Residual        <NA> <NA> 1.3705975 1.1707252
xmSizePred <- data.frame(
  Xm1 = seq(min(xmSize$Xm1), max(xmSize$Xm1), length.out = 100)
)

# Marginal predictions (fixed effects only)
xmSizePred$pred_marginal <- predict(xmLm, newdata = xmSizePred, re.form = NA)

# Conditional predictions (fixed + random effects)
# This requires making predictions for each site
xmSize$pred_conditional <- predict(xmLm, newdata = xmSize)


xmList = read_csv("../data/flXestoMetaData.csv")
## Rows: 366 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): sampleID, date, sample, region, site, species, depthZone
## dbl (4): latDD, longDD, depthFt, depthM
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
xmList$sample = gsub("\\.[1-3]*$", "", xmList$sample) 

xmList = xmList %>% select(-site) %>%  dplyr::left_join(xmSize) %>% dplyr::filter(clusterK2 == "Admixed")
## Joining with `by = join_by(sample, latDD)`
xmList
## # A tibble: 12 × 25
##    sampleID      date  sample region species latDD longDD depthFt depthM depthZone site 
##    <chr>         <chr> <chr>  <chr>  <chr>   <dbl>  <dbl>   <dbl>  <dbl> <chr>     <fct>
##  1 02-IX-20-3-0… 9/2/… SFX025 West … Xestos…  26.7  -80.0      51   15.5 Shallow   West…
##  2 10-IX-20-1-0… 9/10… SFX034 Jupit… Xestos…  26.9  -80.0      66   20.1 Shallow   Jupi…
##  3 10-IX-20-2-0… 9/10… SFX042 Jupit… Xestos…  26.9  -80.0      68   20.7 Shallow   Jupi…
##  4 10-IX-20-2-0… 9/10… SFX043 Jupit… Xestos…  26.9  -80.0      71   21.6 Shallow   Jupi…
##  5 10-IX-20-2-0… 9/10… SFX045 Jupit… Xestos…  26.9  -80.0      71   21.6 Shallow   Jupi…
##  6 11-XII-20-1-… 12/1… SFX062 Ft. L… Xestos…  26.2  -80.1      17    5.2 Shallow   Ft. …
##  7 11-XII-20-2-… 12/1… SFX073 Ft. L… Xestos…  26.1  -80.1      21    6.4 Shallow   Ft. …
##  8 11-XII-20-2-… 12/1… SFX080 Ft. L… Xestos…  26.1  -80.1      20    6.1 Shallow   Ft. …
##  9 11-XII-20-3-… 12/1… SFX081 Ft. L… Xestos…  26.1  -80.1      25    7.6 Shallow   Ft. …
## 10 11-XII-20-3-… 12/1… SFX083 Ft. L… Xestos…  26.1  -80.1      23    7   Shallow   Ft. …
## 11 21-I-21-3-004 1/21… SFX114 Boynt… Xestos…  26.5  -80.0      54   16.5 Shallow   Boyn…
## 12 21-I-21-3-009 1/21… SFX119 Boynt… Xestos…  26.5  -80.0      60   18.3 Shallow   Boyn…
## # ℹ 14 more variables: depthm <dbl>, Xm1 <dbl>, Xm2 <dbl>, PC1 <dbl>, PC2 <dbl>,
## #   PC3 <dbl>, clusterK2 <fct>, clusterK4 <fct>, h <dbl>, od <dbl>, bd <dbl>,
## #   volume <dbl>, sizeClass <ord>, pred_conditional <dbl>
xmGLMMplot = ggplot(xmSize, aes(x = Xm1, y = log(volume))) +
  geom_point(aes(color = site), alpha = 0.6, size = 2.5) +
  # Add a line for the overall marginal effect (fixed effects)
  geom_line(data = xmSizePred, aes(x = Xm1, y = pred_marginal),
            linetype = "dashed", color = "black", linewidth = 1, ) +
  # Add a line for the conditional prediction (fixed + random effects) for each site
  geom_line(aes(y = pred_conditional, group = site, 
    color = site), linewidth = 0.8) +
  labs(x = "Admixture proportion (Xm1)",
    y = "Log(Volume)") +
  scale_color_manual(name = "Site", values = flPal)+
  theme_bw() +
  violinTheme

xmGLMMplot

sizeClassK2 = ggplot(data = xmSize, aes(x = sizeClass, fill = clusterK2, group = clusterK2)) +
  # geom_histogram(bins=5)+
  geom_bar(color = "grey25", linewidth= 0.25) +
  scale_fill_manual(values = xestoKColPal[c(1,2,6)], name = "Lineage") +
  scale_x_discrete(drop = FALSE)+
  xlab("Size class") +
  ylab("Count") +
  theme_bw() +
  violinTheme + 
  theme(legend.position = "right")

sizeClassK2

sizeClassK2site = ggplot(data = xmSize, aes(x = sizeClass, fill = site, group = site)) +
  # geom_density(alpha = 0.25, adjust = 1.2) +
   geom_bar(color = "grey25", linewidth= 0.25) +
  scale_fill_manual(values = flPal, name = "Site") +
  scale_x_discrete(drop = FALSE) +
  xlab("Size class") +
  ylab("Count") +
  theme_bw() +
  violinTheme + 
  theme(legend.position = "right")

sizeClassK2site

# xmPics =image_read("../figures/xmPics.png") %>% image_ggplot()

xmSizeClassPlots = (sizeClassK2 + sizeClassK2site)/xmGLMMplot +
  plot_layout(heights = c(1, 1), guides = "collect") +
  plot_annotation(tag_levels = 'A') &
  theme(plot.tag = element_text(size = 20),
        #legend.spacing = unit(-5, "pt"),
        legend.key = element_blank(),
        legend.background = element_blank())

ggsave("../figures/figure4.png", plot = xmSizeClassPlots, dpi =300, height = 16, width = 20, units = "cm")

Hybridization

K3ad = read.table("../data/admix/noClones/K3.output") %>% dplyr::select(V6, V7, V8) 
K3ad %>% summarise(sum(V6),sum(V7), sum(V8))
##    sum(V6) sum(V7) sum(V8)
## 1 211.2603 43.1265 96.6129
admixK3 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K3ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7", "Xm3" = "V8")

admixK3div = admixK3 %>% mutate(cluster = ifelse(Xm1 >= .97, "Xm1", ifelse(Xm2 >= .97, "Xm2", ifelse(between(Xm1, 0.40, 0.60) & between(Xm2, 0.40, 0.60) , "Hyb", ifelse(between(Xm1, 0.65, 0.85) & between(Xm2, 0.15, 0.35),"Hyb", ifelse(between(Xm2, 0.65, 0.85) & between(Xm1, 0.15, 0.35),"Hyb", "NA")))))) %>% filter(cluster != "NA")

admixK3div %>% group_by(cluster) %>% summarise(n = n())
## # A tibble: 3 × 2
##   cluster     n
##   <chr>   <int>
## 1 Hyb        25
## 2 Xm1        59
## 3 Xm2        13
flXestoAdmixK3div =  admixK3div %>% filter(sample != "FKX006")
# 
# flXestoAdmixK3Melt = melt(flXestoAdmixK3div, id.vars=c("sample", "site", "depth", "depthm"), variable.name="Ancestry", value.name="Fraction")

K2ad = read.table("../data/admix/noClones/K2.output") %>% dplyr::select(V6, V7) 
K2ad %>% summarise(sum(V6),sum(V7))
##    sum(V6) sum(V7)
## 1 301.7527 49.2473
admixK2 = bams %>%  
  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% 
  cbind(K2ad) %>% 
  dplyr::rename("Xm1" = "V6", "Xm2" = "V7")

admixK2div = admixK2 %>% mutate(cluster = ifelse(Xm1 >= 1, "Xm1", ifelse(Xm2 >= 1, "Xm2", ifelse(between(Xm1, 0.40, 0.60) & between(Xm2, 0.40, 0.60) , "Hyb", ifelse(between(Xm1, 0.60, 0.90) & between(Xm2, 0.1, 0.40),"Hyb", ifelse(between(Xm2, 0.60, 0.90) & between(Xm1, 0.1, 0.40),"Hyb", "NA")))))) %>% filter(cluster != "NA")

flXestoAdmixK2 = admixK2div %>% filter(sample %in% flXestoAdmixK3div$sample) %>% arrange(-Xm1)
flXestoAdmixK2$ord = c(1:length(flXestoAdmixK2$sample)) 

flXestoAdmixK2Melt = melt(flXestoAdmixK2, id.vars=c("sample", "site", "depth", "depthm", "cluster", "ord"), variable.name="Lineage", value.name="Fraction")


admixPlotK2Diva = ggplot(data = flXestoAdmixK2Melt, aes(x = ord, y = Fraction, fill = Lineage, order = ord)) +
  geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", linewidth = 0.2) +
  scale_fill_manual(values = xestoKColPal) +
  scale_color_manual(values = rev(flPal)) +
  scale_x_discrete(expand = c(0, 0)) +
  scale_y_continuous(expand = c(0.001, 0.001)) +
  coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()


admixPlotK2Div = admixPlotK2Diva + 
  admixTheme +
  theme(legend.position = "right")

admixPlotK2Div

div_groups = data.frame("sample" = flXestoAdmixK2$sample, "group" = flXestoAdmixK2$cluster) %>% arrange(sample)

div_groups$sample = gsub(pattern = "SF", replacement = "sf_", div_groups$sample)
div_groups$sample = gsub(pattern = "FK", replacement = "fk_", div_groups$sample)
div_groups$sample = paste(div_groups$sample, ".trim.bt2.bam", sep = "")

div_groups$sample
##  [1] "fk_X004.trim.bt2.bam" "fk_X016.trim.bt2.bam" "fk_X043.trim.bt2.bam"
##  [4] "fk_X051.trim.bt2.bam" "fk_X056.trim.bt2.bam" "fk_X059.trim.bt2.bam"
##  [7] "fk_X064.trim.bt2.bam" "fk_X068.trim.bt2.bam" "fk_X087.trim.bt2.bam"
## [10] "fk_X089.trim.bt2.bam" "fk_X100.trim.bt2.bam" "fk_X114.trim.bt2.bam"
## [13] "fk_X119.trim.bt2.bam" "fk_X122.trim.bt2.bam" "fk_X147.trim.bt2.bam"
## [16] "fk_X184.trim.bt2.bam" "fk_X186.trim.bt2.bam" "fk_X188.trim.bt2.bam"
## [19] "fk_X189.trim.bt2.bam" "fk_X205.trim.bt2.bam" "fk_X211.trim.bt2.bam"
## [22] "fk_X215.trim.bt2.bam" "fk_X219.trim.bt2.bam" "fk_X224.trim.bt2.bam"
## [25] "fk_X227.trim.bt2.bam" "fk_X229.trim.bt2.bam" "fk_X230.trim.bt2.bam"
## [28] "sf_X008.trim.bt2.bam" "sf_X012.trim.bt2.bam" "sf_X015.trim.bt2.bam"
## [31] "sf_X023.trim.bt2.bam" "sf_X024.trim.bt2.bam" "sf_X025.trim.bt2.bam"
## [34] "sf_X026.trim.bt2.bam" "sf_X028.trim.bt2.bam" "sf_X033.trim.bt2.bam"
## [37] "sf_X034.trim.bt2.bam" "sf_X036.trim.bt2.bam" "sf_X037.trim.bt2.bam"
## [40] "sf_X039.trim.bt2.bam" "sf_X040.trim.bt2.bam" "sf_X043.trim.bt2.bam"
## [43] "sf_X061.trim.bt2.bam" "sf_X063.trim.bt2.bam" "sf_X064.trim.bt2.bam"
## [46] "sf_X065.trim.bt2.bam" "sf_X066.trim.bt2.bam" "sf_X067.trim.bt2.bam"
## [49] "sf_X068.trim.bt2.bam" "sf_X071.trim.bt2.bam" "sf_X072.trim.bt2.bam"
## [52] "sf_X073.trim.bt2.bam" "sf_X076.trim.bt2.bam" "sf_X077.trim.bt2.bam"
## [55] "sf_X079.trim.bt2.bam" "sf_X080.trim.bt2.bam" "sf_X083.trim.bt2.bam"
## [58] "sf_X084.trim.bt2.bam" "sf_X086.trim.bt2.bam" "sf_X090.trim.bt2.bam"
## [61] "sf_X093.trim.bt2.bam" "sf_X094.trim.bt2.bam" "sf_X103.trim.bt2.bam"
## [64] "sf_X105.trim.bt2.bam" "sf_X107.trim.bt2.bam" "sf_X110.trim.bt2.bam"
## [67] "sf_X112.trim.bt2.bam" "sf_X113.trim.bt2.bam" "sf_X114.trim.bt2.bam"
## [70] "sf_X117.trim.bt2.bam" "sf_X119.trim.bt2.bam"
write_delim(div_groups, "../data/div_groups", col_names = FALSE)

div_groups %>% select("sample") %>% write_delim(., "../data/div_samples", col_names = FALSE)
maDiv = as.matrix(read.table("../data/xestoDiv.ibsMat"))

divSamples = admixK2div %>% filter(sample %in% flXestoAdmixK3div$sample) %>% select(sample)

dimnames(maDiv) = list(divSamples[,1],divSamples[,1])


trdiv = hclust(dist(maDiv),"ave") 

#ggtree(trdiv) + geom_text(aes(label = node), hjust = -.5)


  p1div = ggtree(trdiv, layout = "rectangular") + 
    annotate(geom = "rect", xmin = 0.325, xmax = -.62, ymin = 0.5, ymax = 13.5, fill = xestoKColPal[2], alpha = 0.5, size = .4) +
    annotate(geom = "rect", xmin = 0.325, xmax = -.53, ymin = 13.5, ymax = 37.5, fill = xestoKColPal[6], alpha = 0.5, size = .4) +
    annotate(geom = "rect", xmin = 0.325, xmax = -.39, ymin = 37.5, ymax = 71.5, fill = xestoKColPal[1], alpha = 0.5, size = .4) +
    geom_point2(aes(subset = (node == 73)), shape = 21, size = 4.5, fill=xestoKColPal[6]) +
    geom_point2(aes(subset = (node == 86)), shape = 21, size = 4.5, fill = xestoKColPal[1]) +
    geom_point2(aes(subset = (node == 74)), shape = 21, size = 4.5, fill = xestoKColPal[2]) +
    geom_cladelabel(node = 74, label = "Xm2", barsize = 0, hjust = 8.75) +
    geom_cladelabel(node = 86, label = "Xm1", barsize = 0, hjust = 3.85, vjust = 5.1) +
    geom_cladelabel(node = 73, label = "Admixed", barsize = 0, hjust = 3.9, vjust = 8.6)
    
p2div = p1div + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = site), stat='identity', linewidth = 0.1, color = "grey25", pwidth = 0.05, offset = 0.72) + 
    scale_fill_manual("Site", values = flPal, guide = guide_legend(ncol = 2, order = 1)) +
    theme(legend.key.size = unit(.8, "line")) +
    new_scale_fill()

p3div = p2div + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depth), stat='identity', linewidth = 0.1, color = "grey25", pwidth = 0.05, offset = 2.26) +
    scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(ncol = 2, order = 2)) +
    new_scale_fill()
  
p4div = p3div  + geom_fruit(data=admixK2, geom = geom_bar, mapping = aes(x = 1, y = sample, fill = depthm), stat='identity', color = "grey25", linewidth = 0.1, pwidth = 0.05, offset = 5.34) + 
  scale_fill_viridis_c("Depth (m)", option = "mako", trans = "reverse") +
    new_scale_fill()
  
  p5div = p4div + geom_fruit(data = admixK2_melt, geom=geom_bar, mapping = aes(x = -value, y = sample, fill = variable), 
                 stat='identity', color = "grey25", pwidth = 0.45, offset = 11.57, linewidth = 0.1) +
     scale_fill_manual("Lineage",values = xestoKColPal[1:5]) +
    coord_cartesian(xlim = c(-0.7,0.5)) +
    theme_tree(legend.position = c(0.97, 0.5))

# p5div 

  # p2div = facet_plot(p1div, panel = "location", data=admixK2, geom = geom_tile, mapping = aes(x = 1, fill = site), size = 0.1, color = "grey25") + 
  #   scale_fill_manual("Site", values = flPal, guide = guide_legend(ncol = 2, order = 1)) + 
  #   theme(legend.key.size = unit(.8, "line")) +
  #   new_scale_fill() 
  # 
  # p3div = facet_plot(p2div, panel = "depth zone", data=admixK2, geom = geom_tile, mapping = aes(x = 1, fill = depth), size = 0.1, color = "grey25") +
  #   scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(ncol = 2, order = 2)) +
  #   new_scale_fill()
  # 
  # p4div = facet_plot(p3div, panel = "depth", data=admixK2, geom = geom_tile, mapping = aes(x = 1, fill = depthm), size = 0.1, color = "grey25") + 
  #   scale_fill_viridis_c("Depth (m)", option = "mako", direction = -1, guide = guide_colorbar(order = 2, direction = "horizontal")) +
  #   theme(legend.title.position = "top") +
  #   new_scale_fill()
  # 
  # p5div = facet_plot(p4div, panel="K = 2", data = admixK2_melt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), 
  #               stat ='identity', size = 0.15, color = "grey25") +
  #   scale_fill_manual("Lineage",values = xestoKColPal[1:2], guide = guide_legend(ncol = 2, order = 4)) + 
  #   theme(strip.text = element_blank(),
  #         #legend.direction = "horizontal",
  #         legend.text = element_text(size = 10),
  #         legend.title = element_text(size = 10),
  #         legend.key = element_blank(),
  #         legend.margin = margin(c(0,0,0,0),unit = "pt"))
covDiv = read.table("../data/pcangsd/flXestoDivPcangsd.cov") %>% as.matrix()

pcaEigDiv = eigen(covDiv)
xestoPcaVarDiv = pcaEigDiv$values/sum(pcaEigDiv$values)*100

head(xestoPcaVarDiv)
## [1] 17.668624  4.996007  1.747479  1.600818  1.570501  1.524815
pcangsdDiv = bams %>%  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% filter(sample %in% flXestoAdmixK2$sample) %>% left_join(flXestoAdmixK2)
## Joining with `by = join_by(sample, site, depth, depthm)`
pcangsdDiv$PC1 = pcaEigDiv$vectors[,1]
pcangsdDiv$PC2 = pcaEigDiv$vectors[,2]
pcangsdDiv$PC3 = pcaEigDiv$vectors[,3]
pcangsdDiv$PC4 = pcaEigDiv$vectors[,4]

pcangsdDiv = pcangsdDiv %>% mutate(cluster = ifelse(Xm1 >= 0.98, "Xm1", ifelse(Xm2 >= 0.98, "Xm2", "Admixed")))


pcangsdDiv$cluster = as.factor(pcangsdDiv$cluster)
levels(pcangsdDiv$cluster)
## [1] "Admixed" "Xm1"     "Xm2"
pcangsdDiv$cluster = factor(pcangsdDiv$cluster, levels = levels(pcangsdDiv$cluster)[c(2, 3, 1)]) 

Plot PCA

pcaPlot1Diva = ggplot() +
  geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
  geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
  geom_point(data = pcangsdDiv, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", stroke = 0.25, size = 4, show.legend = TRUE) +
  scale_fill_manual(name = "Lineage", values = xestoKColPal[c(1,2,6)]) +
  scale_shape_manual(name = "Depth zone", values = c(21,23)) +
  labs(x = paste0("PC 1 (", format(round(xestoPcaVarDiv[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoPcaVarDiv[2], 1), nsmall = 1), " %)")) +
  guides(shape = guide_legend(override.aes = list(size = 3, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 3, fill = xestoKColPal[c(1,2,6)], alpha = NA), order = 1, ncol = 1)) +
  theme_bw()

pcaPlot1Div = pcaPlot1Diva +
  pcaTheme +
  theme(legend.position = c(0.12, 0.25),
        legend.key.spacing = unit(2, "pt"),
        legend.spacing = unit(0.25, "pt"),
        legend.margin = margin(4,2,4,2, unit = "pt"),
        legend.background = element_blank())

pcaPlot1Div

xestoAF = read.delim(file = "../data/flXestoAlleleFreq.txt")
xestoAF$chrom = paste(xestoAF$chrom, xestoAF$pos, sep = "_")

xm1mjrAlleles = xestoAF %>% filter(majFreq > 0.85, pop == "Xm1") %>% group_by(chrom, pos)

xm2mnrAlleles = xestoAF %>% filter(minFreq > 0.85, pop == "Xm2") %>% group_by(chrom, pos) 

snpList.a = xm2mnrAlleles$chrom

snps.a = xm1mjrAlleles %>% filter(chrom %in% snpList.a) %>% bind_rows(xm2mnrAlleles)


xm2mjrAlleles = xestoAF %>% filter(majFreq > 0.85, pop == "Xm2") %>% group_by(chrom, pos)

xm1mnrAlleles = xestoAF %>% filter(minFreq > 0.85, pop == "Xm1") %>% group_by(chrom, pos) 

snpList.b = xm1mnrAlleles$chrom 

snps.b = xm2mjrAlleles %>% filter(chrom %in% snpList.b) %>% bind_rows(xm1mnrAlleles)

snps = snps.a %>% bind_rows(snps.b) %>% group_by(chrom) %>% summarise(n = n()) %>% filter(n == 2) %>% select(!n)

write_delim(snps, "../data/snpList")

altSnps = snps %>% left_join(xestoAF) %>% select(chrom, majFreq, pop)
## Joining with `by = join_by(chrom)`
altSnps$pop = factor(altSnps$pop)
altSnps$pop = factor(altSnps$pop, levels = levels(altSnps$pop)[c(1,3,2)]) 

levels(altSnps$pop) = c("Xm1","Admixed","Xm2")

altSnpsMelt = melt(altSnps, id.vars = c("chrom", "pop"))


divPlot = ggplot() +
  geom_tile(data = altSnpsMelt, aes(x = pop, y = chrom, fill = value)) +
# ggplot2::geom_text(data = altSnpsMelt, aes(x = pop, y = chrom, label = round(value, 2))) + # scale_fill_gradientn(colors = rev(c("#645A9FFF", "#B696D6FF", "#F6E2FBFF")))
scale_fill_gradientn(name = "Major allele \nfrequency",colors = c("#3FB8AFFF", "#ADCEA9", "#DAD8A7", "#E4AB9B","#FF3D7FFF")) +
  labs(x = "Lineage", y = "SNP locus") +
  theme(axis.text.y = element_blank(),
        axis.text.x = element_text(angle = 0, color = "black", size = 10),
        axis.ticks = element_blank(),
        panel.border = element_blank(),
        panel.background = element_blank(),
        plot.background = element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        legend.key.size = unit(.8, "line"))
  
# ftl.snps.vcf = snps
# ftl.snps.vcf$pos = ftl.snps.vcf$chrom
# ftl.snps.vcf$chrom = gsub("\\_.*", "", ftl.snps.vcf$chrom)
# ftl.snps.vcf$pos = gsub(".*_", "", ftl.snps.vcf$pos)
# 
# # write_delim(ftl.snps.vcf, "../data/ftl/ftlSnpsVcf", col_names = FALSE)

Divergent snps heterozygosities

divHet = read.delim("../data/divHetOut", header = FALSE, sep = " ") %>% rename("het" = "V2") %>% cbind(lineage = div_groups$group, sample = div_groups$sample) %>% select(-V1)

divHet$lineage = as.factor(divHet$lineage)

divHet$lineage = factor(divHet$lineage, levels = levels(divHet$lineage)[c(2,1,3)])
levels(divHet$lineage) = c("Xm1", "Admixed", "Xm2")

divAOV = welch_anova_test(divHet, het ~ lineage) 
divPH = games_howell_test(divHet, het ~ lineage)

divPH$p.adj
## [1] 0.0000543 0.8120000 0.0000318
hetLetters = data.frame(x = factor(c("Xm1", "Admixed", "Xm2")), y = c(1, 1, 1),  grp = c("a", "b", "a"))

divStat = divAOV$statistic

hetPlotDiv = ggplot(data = divHet, aes(x = lineage, y = het)) +
  geom_violin(aes(fill = lineage, group = lineage), adjust = 1, linewidth = 0.4, color = "black", alpha = 1, trim = TRUE, scale = "area", width = 1.2) +
  geom_boxplot(aes(fill = lineage, group = lineage), width = 0.2, color = "black", fill = "white", outlier.colour = NA, linewidth = 0.4, alpha = 0.5) +  xlab("Lineage") +
  geom_text(data = hetLetters, aes(x = x, y = y, label = grp), size = 4) +
   annotate(geom = "text", x = 2.9, y =0.85, label = sprintf("italic(F) == '%0.2f' * ';' ~ italic(p) < 0.001", divAOV$statistic), size = 3, parse = TRUE) +
  scale_fill_discrete(type = xestoKColPal[c(1, 6, 2)], name = "Lineage") +
  xlab("Lineage") +
  ylab("Heterozygosity") +
  coord_cartesian(expand = TRUE, xlim = c(0.85, 3.15)) +
  theme_bw() + 
  violinTheme

hetPlotDiv

combined plots:

xmDiv = ((p5div) /((pcaPlot1Div + hetPlotDiv + divPlot) + plot_layout(widths = c(3, 2, 1)))) + plot_layout(heights = c(2, 1.5)) +
  # plot_layout(design = layout) +
  plot_annotation(tag_levels = 'A') &
  theme(plot.tag = element_text(size = 20),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.spacing = unit(-5, "pt"),
        legend.key = element_blank(),
        legend.background = element_blank())
  
# xmDiv

# ggsave("../figures/figure6.png", plot = xmDiv, height = 9, width = 12, units = "in", dpi = 300)
ggsave("../figures/figure6.svg", plot = xmDiv, height = 9, width = 12, units = "in", dpi = 300)

Genetic connectivity

Migration

mainBams = read.delim("../data/bamsMainCluster", header = FALSE) %>% rename("sample" = "V1") 
mainBams$sample = gsub("\\.trim.bt2.bam", "", mainBams$sample)
mainBams$sample = gsub("fk_", "FK", mainBams$sample)
mainBams$sample = gsub("sf_", "SF", mainBams$sample)

xestoMainPops = mainBams %>% left_join(bams) %>% mutate("site" = paste(region,depthZone)) %>% select(sample, site)
## Joining with `by = join_by(sample)`
xestoMainPops$site = gsub(" ", "", xestoMainPops$site)
xestoMainPops$site = gsub("'", "", xestoMainPops$site)
xestoMainPops$sample = gsub("FK", "fk_", xestoMainPops$sample)
xestoMainPops$sample = gsub("SF", "sf_", xestoMainPops$sample)
xestoMainPops$sample = paste(xestoMainPops$sample, ".trim.bt2.bam", sep ="")

write_delim(xestoMainPops, "../data/xestoMainPops", col_names = FALSE)

Checking deviance among model runs from BayesAss we ran on HPC

# fileList = substr(list.files("../data/snps/bayesAss/", "BA3trace.*.txt$"),1,10)
fileList = substr(list.files("../data/bayesass/", "BA3trace.*.txt$"),1,11)

bayesian_deviance <- function(trace, burnin = 0, sampling.interval = 0){
  if(burnin == 0) stop('No burnin specified')
  if(sampling.interval == 0) stop('No sampling interval specified')
  range <- (trace$State > burnin & trace$State %% sampling.interval == 0)
  D <- -2*mean(trace$LogProb[range])
  return(D)
}

baRuns = setNames(data.frame(matrix(ncol = 2, nrow = length(fileList))), c("file", "bD"))

for(i in 1:length(fileList)){
  assign(fileList[i], read.delim(paste("../data/bayesass/", fileList[i], ".txt", sep = ""))) %>% dplyr::select(-last_col()) 
  baRuns$file[i] = fileList[i]
  baRuns$bD[i] = (bayesian_deviance(get(fileList[i]), burnin = 2000000, sampling.interval = 1000))
}

bestBA = baRuns %>% dplyr::filter(bD == min(bD)) %>% select(file) %>% as.list()

#[1] "BA3trace.01 4277982.0575"
#[1] "BA3trace.02 4278000.6675"
#[1] "BA3trace.03 4277371.5925"
#[1] "BA3trace.04 4274620.0525"
#[1] "BA3trace.05 4278321.1175"
#[1] "BA3trace.06 4277084.2950"
#[1] "BA3trace.07 4276766.8200"
#[1] "BA3trace.08 4277712.3800"
#

bestBA
## $file
## [1] "BA3trace.04"

All traces have similar deviance (this is good!). Using the trace with the lowest deviance (BA3trace.04, in this case)

bayesAss = read.delim(paste("../data/bayesass/",bestBA,".txt", sep = "")) %>% filter(State > 2000000) %>% dplyr::select(-State, -LogProb, -X)

baMean = bayesAss %>% summarise(across(everything(), list(mean))) %>% t() %>% as_tibble() %>% rename(., mean=V1) %>% mutate(pops = colnames(bayesAss))

baSumm = bayesAss %>% summarise(across(everything(), list(median))) %>% t() %>% as.data.frame() %>% rename(., median=V1) %>% mutate(pops = baMean$pops, mean = round(baMean$mean, 3)) %>% relocate(median, .after = mean)

baSumm$median = round(baSumm$median, 3)

baHpd =as.data.frame(t(sapply(bayesAss, emp.hpd)))
colnames(baHpd) = c("hpdLow", "hpdHigh")
baHpd$pops = rownames(baHpd)

ESS = as.data.frame(sapply(bayesAss, ESS))
colnames(ESS) = "ESS"

baSumm = baSumm %>% left_join(baHpd)
## Joining with `by = join_by(pops)`
baSumm$hpdLow = round(baSumm$hpdLow, 3)
baSumm$hpdHigh = round(baSumm$hpdHigh, 3)
baSumm$ESS = ESS$ESS

### FROM BAYESASS: ###
## Population Index -> Population Label:
#0->UpperKeysMesophotic 1->UpperKeysShallow
#2->LowerKeysShallow 3->TortugasBankMesophotic
#4->TortugasBankShallow 5->RileysHumpMesophotic 
#6->RileysHumpShallow 7->LowerKeysMesophotic
#8->WestPalmShallow 9->JupiterShallow  
#10->Ft.LauderdaleShallow 11->BoyntonShallow  

popi = rep(c("Upper Keys\nMesophotic", "Upper Keys\nShallow", "Lower Keys\nShallow", "Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Lower Keys\nMesophotic", "West Palm\nShallow", "Jupiter\nShallow", "Ft. Lauderdale\nShallow", "Boynton\nShallow"), each = 12)

popj = rep(c("Upper Keys\nMesophotic", "Upper Keys\nShallow", "Lower Keys\nShallow", "Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow", "Lower Keys\nMesophotic", "West Palm\nShallow", "Jupiter\nShallow", "Ft. Lauderdale\nShallow", "Boynton\nShallow"), times = 12)

baSumm = baSumm %>% mutate(pop.i = popi, pop.j = popj) %>% relocate(c(pop.i, pop.j), .after = pops) %>% dplyr::select(-pops)

baSumm$pop.i = factor(baSumm$pop.i)
baSumm$pop.i = factor(baSumm$pop.i, levels = levels(baSumm$pop.i)[c(3, 12, 1, 2, 11, 5, 9, 7, 10, 4, 8, 6)])

baSumm$pop.j = factor(baSumm$pop.j)
baSumm$pop.j = factor(baSumm$pop.j, levels = levels(baSumm$pop.j)[c(3, 12, 1, 2, 11, 5, 9, 7, 10, 4, 8, 6)])

baSumm$site.i = word(baSumm$pop.i, 1, sep = "\n")
baSumm$site.i = factor(baSumm$site.i)
baSumm$site.i = factor(baSumm$site.i, levels = levels(baSumm$site.i)[c(3, 8, 1, 2, 7, 4, 6, 5)])

baSumm$site.j = word(baSumm$pop.j, 1, sep = "\n")
baSumm$site.j = factor(baSumm$site.j)
baSumm$site.j = factor(baSumm$site.j, levels = levels(baSumm$site.j)[c(3, 8, 1, 2, 7, 4, 6, 5)])

baSumm$depth.i = word(baSumm$pop.i, 2, sep = "\n")
baSumm$depth.i = factor(baSumm$depth.i)
baSumm$depth.i = factor(baSumm$depth.i, levels = levels(baSumm$depth.i)[c(2, 1)])

baSumm$depth.j = word(baSumm$pop.j, 2, sep = "\n")
baSumm$depth.j = factor(baSumm$depth.j)
baSumm$depth.j = factor(baSumm$depth.j, levels = levels(baSumm$depth.j)[c(2, 1)])
#All sites (excluding self retention)
baMeans = baSumm %>% filter(pop.i != pop.j) %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Global")

#mesophotic sources
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic Source") %>% bind_rows(baMeans, .)

#shallow sources
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow Source") %>% bind_rows(baMeans, .)

#mesophotic sinks
baMeans = baSumm %>% filter(pop.i != pop.j, depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.)))  %>% mutate(dataset = "Mesophotic Sink") %>% bind_rows(baMeans, .)

#shallow sinks
baMeans = baSumm %>% filter(pop.i != pop.j, depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.)))  %>% mutate(dataset = "Shallow Sink") %>% bind_rows(baMeans, .)

#mesophotic -> shallow
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Shallow") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Shallow") %>% bind_rows(baMeans, .)

#mesophotic -> mesophotic
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Mesophotic", depth.i == "Mesophotic") %>% summarise(mean = mean(mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Mesophotic -> Mesophotic") %>% bind_rows(baMeans, .)

#shallow -> mesophotic
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Mesophotic") %>% summarise(mean = mean(.$mean), sd = sd(.$mean), se = sd(.$mean)/sqrt(nrow(.))) %>% mutate(dataset = "Shallow -> Mesophotic") %>% bind_rows(baMeans, .)

#shallow -> shallow
baMeans = baSumm %>% filter(pop.i != pop.j, depth.j == "Shallow", depth.i == "Shallow") %>% summarise(mean = round(mean(.$mean), 5), sd = round(sd(.$mean), 5), se = round(sd(.$mean)/sqrt(nrow(.)), 3)) %>% mutate(dataset = paste("Shallow -> Shallow")) %>% bind_rows(baMeans, .) %>% relocate(dataset, .before = mean) %>% as.data.frame()

baMeans[,c(2:4)] = baMeans[,c(2:4)] %>% round(4)

baMeans
##                    dataset   mean     sd     se
## 1                   Global 0.0159 0.0173 0.0015
## 2        Mesophotic Source 0.0158 0.0215 0.0032
## 3           Shallow Source 0.0160 0.0149 0.0016
## 4          Mesophotic Sink 0.0177 0.0176 0.0027
## 5             Shallow Sink 0.0150 0.0172 0.0018
## 6    Mesophotic -> Shallow 0.0163 0.0243 0.0043
## 7 Mesophotic -> Mesophotic 0.0148 0.0115 0.0033
## 8    Shallow -> Mesophotic 0.0188 0.0195 0.0034
## 9       Shallow -> Shallow 0.0143 0.0114 0.0020
baMeansTabPub = baMeans %>%
  flextable() %>%
  flextable::compose(part = "header", j = "dataset", value = as_paragraph("Dataset")) %>%
  flextable::compose(part = "header", j = "mean", value = as_paragraph(as_i("m"))) %>%
  flextable::compose(part = "header", j = "sd", value = as_paragraph("SD")) %>%
  flextable::compose(part = "header", j = "se", value = as_paragraph("SEM")) %>%
  flextable::font(fontname = "Times New Roman", part = "all") %>%
  flextable::fontsize(size = 10, part = "all") %>%
  flextable::bold(part = "header") %>%
  flextable::align(align = "left", part = "all") %>%
  flextable::autofit()

table1 = read_docx()
table1 = body_add_flextable(table1, value = baMeansTabPub)
print(table1, target = "../tables/table1.docx")

baMeansTabPub

Dataset

m

SD

SEM

Global

0.0159

0.0173

0.0015

Mesophotic Source

0.0158

0.0215

0.0032

Shallow Source

0.0160

0.0149

0.0016

Mesophotic Sink

0.0177

0.0176

0.0027

Shallow Sink

0.0150

0.0172

0.0018

Mesophotic -> Shallow

0.0163

0.0243

0.0043

Mesophotic -> Mesophotic

0.0148

0.0115

0.0033

Shallow -> Mesophotic

0.0188

0.0195

0.0034

Shallow -> Shallow

0.0143

0.0114

0.0020

baSumm$mean = sprintf('%.3f', baSumm$mean)
baSumm$mean2 = baSumm$mean
baSumm$hpdLow = sprintf('%.3f', baSumm$hpdLow)
baSumm$hpdHigh = sprintf('%.3f', baSumm$hpdHigh)

baLabs = tibble(pop.i = unique(baSumm$pop.i), pop.j = unique(baSumm$pop.j))

migrateA = ggplot(data = baSumm, aes(pop.i, pop.j))+
  geom_tile(data = subset(baSumm, subset = baSumm$mean2>0.65), fill = "gray35", color = "white") +
  geom_segment(data = baSumm, aes(x = 0.4755, xend = -0.55, y = pop.j, yend = pop.j, color = pop.j), size = 16) +
  geom_segment(data = baSumm, aes(x = pop.i, xend = pop.i, y = 0.45, yend = -0.425, color = pop.i), size = 32) +
  scale_color_manual(values = flPal[c(1:8, 5:8)], guide = NULL) +
  guides(fill = guide_colorbar(ticks.colour = "black", barwidth = 1, barheight = 10, frame.colour = "black")) +
  geom_tile(data = subset(baSumm, subset = baSumm$mean<0.65), aes(fill = as.numeric(as.character(mean))), color = "white") +
  scale_fill_gradientn(colours = paletteer_c("viridis::mako", n = 10, direction = -1)[c(1:7)], limit = c(0,0.16), space = "Lab", name = expression(paste(italic("m"))), na.value = "transparent",  guide = "colourbar", values = c(0, 0.05, 0.1, 0.15, 0.2,0.5,0.75,1)) +
  geom_text(data = baSumm, aes(x = pop.i, y = pop.j, label = paste(mean, "\n", sep = "")), color = ifelse(baSumm$mean > 0.6, "white", "gray5"), fontface = ifelse(as.numeric(baSumm$hpdLow)>0, "bold", "plain"), size = ifelse(as.numeric(baSumm$hpdLow)>0, 4.75, 4)) +
  geom_text(data = baSumm, aes(x = pop.i, y = pop.j, label = paste("\n(",hpdLow,"–",hpdHigh, ")", sep = "")), color = ifelse(baSumm$mean > 0.6, "white", "gray5"), size = 3.25) +
  geom_text(data = (baLabs %>% filter(pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#FFFFFF", family = "sans") +
  geom_text(data = (baLabs %>% filter(!pop.j %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), x = -.02, aes(y = pop.j, label = pop.j), size = 3.75, color = "#000000", family = "sans") +
  geom_text(data = (baLabs %>% filter(pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#FFFFFF", family = "sans") +
  geom_text(data = (baLabs %>% filter(!pop.i %in% c("Tortugas Bank\nMesophotic", "Tortugas Bank\nShallow", "Riley's Hump\nMesophotic", "Riley's Hump\nShallow"))), y = -.01, aes(x = pop.i, label = pop.i), size = 3.75, color = "#000000", family = "sans") +
  
  labs(x = "Sink", y = "Source") +
  scale_y_discrete(limits = rev(levels(baSumm$pop.i))[c(1:12)], position = "left") +
  coord_cartesian(xlim = c(1, 12), ylim = c(1, 12), clip = "off") +
  theme_minimal()

migrate = migrateA + theme(
  axis.text.x = element_text(vjust = 1, size = 12, hjust = 0.5, color = NA),
  axis.text.y = element_text(size = 10, color = NA),
  axis.title.x = element_text(size = 14),
  axis.title.y = element_text(size = 14),
  panel.grid.major = element_blank(),
  axis.ticks = element_blank(),
  # legend.position = c(1.055, 0.5),
  legend.direction = "vertical",
  legend.title = element_text(size = 12, face = "bold")
)

# migrate

ggsave("../figures/figure7.png", plot = migrate, width = 36, height = 18, units = "cm", dpi = 300)

Main cluster

maMain = as.matrix(read.table("../data/xestoMain.ibsMat"))

bamsMain = read.delim("../data/pcangsd/mainCluster", header = FALSE) %>% rename("sample" = "V1") %>% left_join(bams) %>%  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM)
## Joining with `by = join_by(sample)`
dimnames(maMain) = list(bamsMain[,1],bamsMain[,1])

pcAdmixMain = read.table("../data/admix/main/K3main.output") %>% dplyr::select(V6, V7, V8) %>% dplyr::rename("Xm1" = "V6", "Xm1.2" = "V7", "Xm1.3" = "V8") %>% cbind(bamsMain)

pcAdmixMainMelt = melt(pcAdmixMain, id.vars = c("sample", "site", "depth", "depthm"))

trMain = hclust(dist(maMain),"ave") 

  p1Main = ggtree(trMain, layout = "rectangular") #+ ggtree::geom_tiplab()

  p2Main = facet_plot(p1Main, panel = "location", data=bamsMain, geom = geom_tile, mapping = aes(x = 1, fill = site), size = 0.1, color = "grey25") + 
    scale_fill_manual("Site", values = flPal, guide = guide_legend(ncol = 1, order = 1)) + 
    theme(legend.key.size = unit(.8, "line")) +
    new_scale_fill() 

  p3Main = facet_plot(p2Main, panel = "depth zone", data=bamsMain, geom = geom_tile, mapping = aes(x = 1, fill = depth), size = 0.1, color = "grey25") +
    scale_fill_manual("Depth zone", values = c("#A7E1BCFF", "#414083FF"), guide = guide_legend(ncol = 1, order = 2)) +
    new_scale_fill()
  
  p4Main = facet_plot(p3Main, panel = "depth", data=bamsMain, geom = geom_tile, mapping = aes(x = 1, fill = depthm), size = 0.1, color = "grey25") + 
    scale_fill_viridis_c("Depth (m)", option = "mako", direction = -1, guide = guide_colorbar(order = 2, direction = "horizontal")) +
    theme(legend.title.position = "top") +
    new_scale_fill()

  p5Main = facet_plot(p4Main, panel="K = 2", data = pcAdmixMainMelt, geom=geom_barh, width = 1.0, mapping = aes(x = value, fill = variable), 
                stat ='identity', size = 0.15, color = "grey25") +
    scale_fill_manual("Lineage",values = xestoKColPal[c(1,3,4)], guide = guide_legend(ncol = 1, order = 4)) + 
    theme(strip.text = element_blank(),
          #legend.direction = "horizontal",
          legend.text = element_text(size = 10),
          legend.title = element_text(size = 10),
          legend.key = element_blank(),
          legend.margin = margin(c(0,0,0,0),unit = "pt"))

p5Main 

mainTree =  facet_widths(p5Main, widths = c(0.25, 0.025, 0.025, 0.025, 0.25))
covMain = read.table("../data/pcangsd/flXestoMainPcangsd.cov") %>% as.matrix()

pcAdmixMain2 = read.table("../data/admix/main/K3main.output") %>% dplyr::select(V6, V7, V8)

pcAdmixMain2 = pcAdmixMain2 %>% dplyr::rename("Xm1" = "V6", "Xm1.2" = "V7", "Xm1.3" = "V8")
  
pcaEigMain = eigen(covMain)
xestoMainPcaVar = pcaEigMain$values/sum(pcaEigMain$values)*100
head(xestoMainPcaVar)
## [1] 3.098399 2.275836 1.576540 1.541056 1.439801 1.368856
pcangsdMain = read.delim("../data/pcangsd/mainCluster", header = FALSE) %>% rename("sample" = "V1") %>% left_join(bams) %>%  dplyr::select(sample, "site" = region, "depth" = depthZone, "depthm" = depthM) %>% cbind(pcAdmixMain2)
## Joining with `by = join_by(sample)`
pcangsdMain$sitedepth = as.factor(paste(pcangsdMain$site, pcangsdMain$depth, sep = " "))

pcangsdMain$depth = factor(pcangsdMain$depth)
pcangsdMain$depth = factor(pcangsdMain$depth, levels = levels(pcangsdMain$depth)[c(2,1)])

pcangsdMain$sitedepth = factor(pcangsdMain$sitedepth)
pcangsdMain$sitedepth = factor(pcangsdMain$sitedepth, levels(pcangsdMain$sitedepth)[c(3, 12, 1, 2, 11, 10, 5, 4, 9, 8, 7, 6)])


pcangsdMain$depth = factor(pcangsdMain$depth)
pcangsdMain$depth = factor(pcangsdMain$depth, levels(pcangsdMain$depth)[c(2, 1)])


pcangsdMain$PC1 = pcaEigMain$vectors[,1]
pcangsdMain$PC2 = pcaEigMain$vectors[,2]
pcangsdMain$PC3 = pcaEigMain$vectors[,3]
pcangsdMain$PC4 = pcaEigMain$vectors[,4]


pcangsdClustMain = pcAdmixMain2 %>% mutate(cluster = ifelse(Xm1 >=0.75, "Xm1", ifelse(Xm1.2 >= 0.75, "Xm1.2", ifelse(Xm1.3 >= 0.75, "Xm1.3", "Admixed"))))


pcangsdMain = pcangsdMain %>% mutate(cluster = pcangsdClustMain$cluster)

pcangsdMain = merge(pcangsdMain, aggregate(cbind(mean.x = PC1, mean.y = PC2)~sitedepth, pcangsdMain, mean), by="sitedepth")

pcangsdMain$cluster = factor(pcangsdMain$cluster)
pcangsdMain$cluster = factor(pcangsdMain$cluster, levels = levels(pcangsdMain$cluster)[c(2,3,4,1)])

pcaPlotMainSA = ggplot() +
  geom_hline(yintercept = 0, color = "gray90", size = 0.25) +
  geom_vline(xintercept = 0, color = "gray90", size = 0.25) +
  geom_point(data = pcangsdMain, aes(x = PC1, y = PC2, fill = site, shape = depth), color = "black", stroke = 0.25, size = 4, alpha = 0.5, show.legend = FALSE) +
  geom_point(data = pcangsdMain, aes(x = mean.x, y = mean.y, fill = site, shape = depth), color = "black", size = 5, alpha = 1, stroke = 0.5) +
  scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
  scale_fill_manual(values = flPal, name = "Site") +
  scale_color_manual(values = flPal, name = "Site") +
  labs(x = paste0("PC 1 (", format(round(xestoMainPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoMainPcaVar[2], 1), nsmall = 1), " %)")) +
  guides(shape = guide_legend(override.aes = list(size = 4, stroke = 0.25, alpha = NA), order = 2, ncol = 1), fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = flPal, alpha = NA), order = 1, ncol = 1)) +
  theme_bw()

pcaPlotMainS = pcaPlotMainSA +
  pcaTheme +
  theme(legend.position = c(0.15, 0.24))

pcaPlotMainS

pcaPlotMainK3A = ggplot() +
  geom_hline(yintercept = 0, color = "gray90", size = 0.5) +
  geom_vline(xintercept = 0, color = "gray90", size = 0.5) +
  geom_point(data = pcangsdMain, aes(x = PC1, y = PC2, fill = cluster, shape = depth), color = "black", size = 4, alpha = 1, show.legend = TRUE) +
  scale_shape_manual(values = c(21, 23), name = "Depth Zone") +
  scale_fill_manual(values = xestoKColPal[c(1,3,4,6)], name = "Lineage") +
  labs(x = paste0("PC 1 (", format(round(xestoMainPcaVar[1], 1), nsmall = 1)," %)"), y = paste0("PC 2 (", format(round(xestoMainPcaVar[2], 1), nsmall = 1), " %)")) +
  guides(shape = "none", fill = guide_legend(override.aes = list(shape = 22, size = 4, fill = xestoKColPal[c(1,3,4,6)], alpha = NA), order = 1, ncol = 1))+
  theme_bw()

pcaPlotMainK3 = pcaPlotMainK3A +
  pcaTheme +
  theme(legend.position = c(0.12, 0.15))

pcaPlotMainK3

pcAdmixMain3 = arrange(pcAdmixMain, site, depth, -Xm1, -Xm1.2)
popCounts = pcAdmixMain3 %>% group_by(site, depth) %>% summarise(n = n())
## `summarise()` has grouped output by 'site'. You can override using the `.groups`
## argument.
pcAdmixMain3$site = factor(pcAdmixMain3$site, levels = rev(levels(pcAdmixMain3$site)))

popCounts
## # A tibble: 12 × 3
## # Groups:   site [8]
##    site           depth          n
##    <fct>          <fct>      <int>
##  1 Jupiter        Shallow       27
##  2 West Palm      Shallow       29
##  3 Boynton        Shallow       28
##  4 Ft. Lauderdale Shallow        9
##  5 Upper Keys     Shallow       27
##  6 Upper Keys     Mesophotic    28
##  7 Lower Keys     Shallow       28
##  8 Lower Keys     Mesophotic    30
##  9 Tortugas Bank  Shallow       29
## 10 Tortugas Bank  Mesophotic    15
## 11 Riley's Hump   Shallow       26
## 12 Riley's Hump   Mesophotic    24
popCountList = reshape2::melt(lapply(popCounts$n,function(x){c(1:x)}))
pcAdmixMain3$ord = popCountList$value

pcAdmixMain3Melt = melt(pcAdmixMain3, id.vars=c("sample", "site", "depth", "depthm", "ord"), variable.name="Ancestry", value.name="Fraction")

pcAdmixMain3Melt$Ancestry = factor(pcAdmixMain3Melt$Ancestry)
pcAdmixMain3Melt$Ancestry = factor(pcAdmixMain3Melt$Ancestry, levels = rev(levels(pcAdmixMain3Melt$Ancestry)))

popAnno = data.frame(x1 = c(0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5), x2 = c(30.5, 30.5, 30.5, 30.5, 30.5, 30.5, 30.5, 30.5),
                     y1 = -.1, y2 = -.1, sample = NA, Ancestry = NA, depth = "Shallow", 
                     ord  = NA, Fraction = NA,
                     site = c("Riley's Hump", "Tortugas Bank", 
                                  "Lower Keys", "Upper Keys", "Ft. Lauderdale", "Boynton", "West Palm", "Jupiter"))

popAnno$site = factor(popAnno$site)
popAnno$site = factor(popAnno$site, levels = levels(popAnno$site)[rev(c(3, 8, 1, 2, 7, 4, 6, 5))])

Make admixture plots

mainAdmixPlotA = ggplot(data = pcAdmixMain3Melt, aes(x = ord, y = Fraction, fill = Ancestry, order = sample)) +
  geom_segment(data = popAnno, aes(x = x1, xend = x2, y = -1.11, yend = -1.11, color = site), size = 7) +
  geom_bar(stat = "identity", position = "fill", width = 1, colour = "grey25", size = 0.2) +
  facet_grid(factor(depth) ~ site, switch = "both") +
  geom_text(data = (pcAdmixMain3Melt %>% filter(depth == "Mesophotic", site %in% c("Riley's Hump", "Tortugas Bank"), sample %in% c("FKX047", "FKX070"), Ancestry == "Xm1")), x = 15.5, y = -.1, aes(label = site), size = 4, color = "#FFFFFF") +
  geom_text(data = (pcAdmixMain3Melt %>% filter(depth == "Shallow", !site %in% c("Riley's Hump", "Tortugas Bank"), sample %in% c("FKX148", "FKX208", "SFX069", "SFX093", "SFX021","SFX035"), Ancestry == "Xm1")), x = 15.5, y = -1.11, aes(label = site), size = 4, color = "#000000") +
  scale_fill_manual(values = xestoKColPal[c(1,3,4)]) +
  scale_color_manual(values = rev(flPal)) +
  scale_x_discrete(expand = c(0.005, 0.005)) +
  scale_y_continuous(expand = c(0.001, 0.001)) +
  coord_cartesian(ylim = c(0.0, 1.0), clip = "off") +
theme_bw()
  
mainAdmixPlot = mainAdmixPlotA + 
  theme_bw()+
  theme(
  panel.grid = element_blank(),
  plot.background = element_blank(),
  panel.border = element_rect(fill = NA, color = "black", size = 0.75, linetype = "solid"),
  panel.spacing.x = grid:::unit(0.05, "lines"),
  panel.spacing.y = grid:::unit(0.05, "lines"),
  panel.background = element_blank(),
  axis.text.x = element_blank(),
  axis.text.y = element_blank(),
  axis.ticks.x = element_blank(),
  axis.ticks.y = element_blank(),
  axis.title = element_blank(),
  strip.background.x = element_blank(),
  strip.background.y = element_blank(),
  strip.text = element_text(size = 8),
  strip.text.y.left = element_text(size = 10, angle = 90),
  strip.text.x.bottom = element_text(vjust = 1, color = NA),
  legend.key = element_blank(),
  legend.position = "none",
  legend.title = element_text(size = 8))

mainAdmixPlot

mainPlots = (mainTree)/((pcaPlotMainS + pcaPlotMainK3 + guide_area()) +
  plot_layout(widths = c(1, 1,0.25), guides = "collect"))/(mainAdmixPlot) +
  plot_layout(heights = c(2.5, 1.75, 2)) +
  
  plot_annotation(tag_levels = 'A') &
  theme(plot.tag = element_text(size = 20),
        legend.text = element_text(size = 10),
        legend.title = element_text(size = 10),
        legend.background = element_blank(),
        legend.position = )

mainPlots

ggsave("../figures/figureS1.png", plot = mainPlots, height = 14, width = 12, units = "in", dpi = 300)

save.image("flXesto.RData")